diff --git a/clang/include/clang/AST/Attr.h b/clang/include/clang/AST/Attr.h index 070e160d65170dbee92365472f6f1dd5e876f121..635f7a7fb165dc761f206e32069db0ac8dcaf299 100644 --- a/clang/include/clang/AST/Attr.h +++ b/clang/include/clang/AST/Attr.h @@ -25,6 +25,7 @@ #include "clang/Basic/Sanitizers.h" #include "clang/Basic/SourceLocation.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/Support/CodeGen.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/VersionTuple.h" #include "llvm/Support/raw_ostream.h" diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index 19229687de7dea1659b82225f3fdb340e771ebc1..a5294cd9e60a83ff89538f15fcc7c7e645c425fc 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -133,6 +133,11 @@ def SharedVar : SubsetSubjecthasGlobalStorage()}], "global variables">; +def NonTLSGlobalVar : SubsetSubjecthasGlobalStorage() && + S->getTLSKind() == 0}], + "non-TLS global variables">; + def InlineFunction : SubsetSubjectisInlineSpecified()}], "inline functions">; @@ -257,23 +262,28 @@ class DefaultIntArgument : IntArgument { int Default = default; } -// This argument is more complex, it includes the enumerator type name, -// a list of strings to accept, and a list of enumerators to map them to. +// This argument is more complex, it includes the enumerator type +// name, whether the enum type is externally defined, a list of +// strings to accept, and a list of enumerators to map them to. class EnumArgument values, - list enums, bit opt = 0, bit fake = 0> + list enums, bit opt = 0, bit fake = 0, + bit isExternalType = 0> : Argument { string Type = type; list Values = values; list Enums = enums; + bit IsExternalType = isExternalType; } // FIXME: There should be a VariadicArgument type that takes any other type // of argument and generates the appropriate type. class VariadicEnumArgument values, - list enums> : Argument { + list enums, bit isExternalType = 0> + : Argument { string Type = type; list Values = values; list Enums = enums; + bit IsExternalType = isExternalType; } // This handles one spelling of an attribute. @@ -392,6 +402,7 @@ def TargetAVR : TargetArch<["avr"]>; def TargetBPF : TargetArch<["bpfel", "bpfeb"]>; // OHOS_LOCAL def TargetXVM : TargetArch<["xvm"]>; +def TargetLoongArch : TargetArch<["loongarch32", "loongarch64"]>; def TargetMips32 : TargetArch<["mips", "mipsel"]>; def TargetAnyMips : TargetArch<["mips", "mipsel", "mips64", "mips64el"]>; def TargetMSP430 : TargetArch<["msp430"]>; @@ -2582,6 +2593,15 @@ def PragmaClangTextSection : InheritableAttr { let Documentation = [InternalOnly]; } +def CodeModel : InheritableAttr, TargetSpecificAttr { + let Spellings = [GCC<"model">]; + let Args = [EnumArgument<"Model", "llvm::CodeModel::Model", + ["normal", "medium", "extreme"], ["Small", "Medium", "Large"], + /*opt=*/0, /*fake=*/0, /*isExternalType=*/1>]; + let Subjects = SubjectList<[NonTLSGlobalVar], ErrorDiag>; + let Documentation = [CodeModelDocs]; +} + def Sentinel : InheritableAttr { let Spellings = [GCC<"sentinel">]; let Args = [DefaultIntArgument<"Sentinel", 0>, diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index 06dd22833dae8e5290718a48b034da54c2199fef..ae11c8822be89fa158c7f3000f607216c2493f3d 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -57,6 +57,15 @@ global variable or function should be in after translation. let Heading = "section, __declspec(allocate)"; } +def CodeModelDocs : Documentation { + let Category = DocCatVariable; + let Content = [{ +The ``model`` attribute allows overriding the translation unit's +code model (specified by ``-mcmodel``) for a specific global variable. + }]; + let Heading = "model"; +} + def UsedDocs : Documentation { let Category = DocCatFunction; let Content = [{ diff --git a/clang/include/clang/Basic/BuiltinsLoongArch.def b/clang/include/clang/Basic/BuiltinsLoongArch.def index 7f2c8403410dd3ef491148c89c70c082ffc6b4df..95359a3fdc711d4dd7ed15a9fb4ba481980af588 100644 --- a/clang/include/clang/Basic/BuiltinsLoongArch.def +++ b/clang/include/clang/Basic/BuiltinsLoongArch.def @@ -15,47 +15,14 @@ # define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) BUILTIN(ID, TYPE, ATTRS) #endif -// TODO: Support more builtins. -// TODO: Added feature constraints. -TARGET_BUILTIN(__builtin_loongarch_cacop_d, "vLiULiLi", "nc", "64bit") -TARGET_BUILTIN(__builtin_loongarch_cacop_w, "viUii", "nc", "32bit") -TARGET_BUILTIN(__builtin_loongarch_dbar, "vIUi", "nc", "") -TARGET_BUILTIN(__builtin_loongarch_ibar, "vIUi", "nc", "") -TARGET_BUILTIN(__builtin_loongarch_movfcsr2gr, "UiIUi", "nc", "f") -TARGET_BUILTIN(__builtin_loongarch_movgr2fcsr, "vIUiUi", "nc", "f") -TARGET_BUILTIN(__builtin_loongarch_break, "vIUi", "nc", "") -TARGET_BUILTIN(__builtin_loongarch_syscall, "vIUi", "nc", "") -TARGET_BUILTIN(__builtin_loongarch_cpucfg, "UiUi", "nc", "") -TARGET_BUILTIN(__builtin_loongarch_asrtle_d, "vLiLi", "nc", "64bit") -TARGET_BUILTIN(__builtin_loongarch_asrtgt_d, "vLiLi", "nc", "64bit") +// Definition of LoongArch basic builtins. +#include "clang/Basic/BuiltinsLoongArchBase.def" -TARGET_BUILTIN(__builtin_loongarch_crc_w_b_w, "iii", "nc", "64bit") -TARGET_BUILTIN(__builtin_loongarch_crc_w_h_w, "iii", "nc", "64bit") -TARGET_BUILTIN(__builtin_loongarch_crc_w_w_w, "iii", "nc", "64bit") -TARGET_BUILTIN(__builtin_loongarch_crc_w_d_w, "iLii", "nc", "64bit") -TARGET_BUILTIN(__builtin_loongarch_crcc_w_b_w, "iii", "nc", "64bit") -TARGET_BUILTIN(__builtin_loongarch_crcc_w_h_w, "iii", "nc", "64bit") -TARGET_BUILTIN(__builtin_loongarch_crcc_w_w_w, "iii", "nc", "64bit") -TARGET_BUILTIN(__builtin_loongarch_crcc_w_d_w, "iLii", "nc", "64bit") +// Definition of LSX builtins. +#include "clang/Basic/BuiltinsLoongArchLSX.def" -TARGET_BUILTIN(__builtin_loongarch_csrrd_w, "UiIUi", "nc", "") -TARGET_BUILTIN(__builtin_loongarch_csrrd_d, "ULiIUi", "nc", "64bit") -TARGET_BUILTIN(__builtin_loongarch_csrwr_w, "UiUiIUi", "nc", "") -TARGET_BUILTIN(__builtin_loongarch_csrwr_d, "ULiULiIUi", "nc", "64bit") -TARGET_BUILTIN(__builtin_loongarch_csrxchg_w, "UiUiUiIUi", "nc", "") -TARGET_BUILTIN(__builtin_loongarch_csrxchg_d, "ULiULiULiIUi", "nc", "64bit") - -TARGET_BUILTIN(__builtin_loongarch_iocsrrd_b, "UiUi", "nc", "") -TARGET_BUILTIN(__builtin_loongarch_iocsrrd_h, "UiUi", "nc", "") -TARGET_BUILTIN(__builtin_loongarch_iocsrrd_w, "UiUi", "nc", "") -TARGET_BUILTIN(__builtin_loongarch_iocsrrd_d, "ULiUi", "nc", "64bit") -TARGET_BUILTIN(__builtin_loongarch_iocsrwr_b, "vUiUi", "nc", "") -TARGET_BUILTIN(__builtin_loongarch_iocsrwr_h, "vUiUi", "nc", "") -TARGET_BUILTIN(__builtin_loongarch_iocsrwr_w, "vUiUi", "nc", "") -TARGET_BUILTIN(__builtin_loongarch_iocsrwr_d, "vULiUi", "nc", "64bit") - -TARGET_BUILTIN(__builtin_loongarch_lddir_d, "LiLiIULi", "nc", "64bit") -TARGET_BUILTIN(__builtin_loongarch_ldpte_d, "vLiIULi", "nc", "64bit") +// Definition of LASX builtins. +#include "clang/Basic/BuiltinsLoongArchLASX.def" #undef BUILTIN #undef TARGET_BUILTIN diff --git a/clang/include/clang/Basic/BuiltinsLoongArchBase.def b/clang/include/clang/Basic/BuiltinsLoongArchBase.def new file mode 100644 index 0000000000000000000000000000000000000000..cbb239223aae3b22e8ef15ee4627c60825aeea39 --- /dev/null +++ b/clang/include/clang/Basic/BuiltinsLoongArchBase.def @@ -0,0 +1,53 @@ +//============------------ BuiltinsLoongArchBase.def -------------*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the LoongArch-specific basic builtin function database. +// Users of this file must define the BUILTIN macro to make use of this +// information. +// +//===----------------------------------------------------------------------===// + +TARGET_BUILTIN(__builtin_loongarch_cacop_d, "vWiUWiWi", "nc", "64bit") +TARGET_BUILTIN(__builtin_loongarch_cacop_w, "viUii", "nc", "32bit") +TARGET_BUILTIN(__builtin_loongarch_dbar, "vIUi", "nc", "") +TARGET_BUILTIN(__builtin_loongarch_ibar, "vIUi", "nc", "") +TARGET_BUILTIN(__builtin_loongarch_movfcsr2gr, "UiIUi", "nc", "f") +TARGET_BUILTIN(__builtin_loongarch_movgr2fcsr, "vIUiUi", "nc", "f") +TARGET_BUILTIN(__builtin_loongarch_break, "vIUi", "nc", "") +TARGET_BUILTIN(__builtin_loongarch_syscall, "vIUi", "nc", "") +TARGET_BUILTIN(__builtin_loongarch_cpucfg, "UiUi", "nc", "") +TARGET_BUILTIN(__builtin_loongarch_asrtle_d, "vWiWi", "nc", "64bit") +TARGET_BUILTIN(__builtin_loongarch_asrtgt_d, "vWiWi", "nc", "64bit") + +TARGET_BUILTIN(__builtin_loongarch_crc_w_b_w, "iii", "nc", "64bit") +TARGET_BUILTIN(__builtin_loongarch_crc_w_h_w, "iii", "nc", "64bit") +TARGET_BUILTIN(__builtin_loongarch_crc_w_w_w, "iii", "nc", "64bit") +TARGET_BUILTIN(__builtin_loongarch_crc_w_d_w, "iWii", "nc", "64bit") +TARGET_BUILTIN(__builtin_loongarch_crcc_w_b_w, "iii", "nc", "64bit") +TARGET_BUILTIN(__builtin_loongarch_crcc_w_h_w, "iii", "nc", "64bit") +TARGET_BUILTIN(__builtin_loongarch_crcc_w_w_w, "iii", "nc", "64bit") +TARGET_BUILTIN(__builtin_loongarch_crcc_w_d_w, "iWii", "nc", "64bit") + +TARGET_BUILTIN(__builtin_loongarch_csrrd_w, "UiIUi", "nc", "") +TARGET_BUILTIN(__builtin_loongarch_csrrd_d, "UWiIUi", "nc", "64bit") +TARGET_BUILTIN(__builtin_loongarch_csrwr_w, "UiUiIUi", "nc", "") +TARGET_BUILTIN(__builtin_loongarch_csrwr_d, "UWiUWiIUi", "nc", "64bit") +TARGET_BUILTIN(__builtin_loongarch_csrxchg_w, "UiUiUiIUi", "nc", "") +TARGET_BUILTIN(__builtin_loongarch_csrxchg_d, "UWiUWiUWiIUi", "nc", "64bit") + +TARGET_BUILTIN(__builtin_loongarch_iocsrrd_b, "UiUi", "nc", "") +TARGET_BUILTIN(__builtin_loongarch_iocsrrd_h, "UiUi", "nc", "") +TARGET_BUILTIN(__builtin_loongarch_iocsrrd_w, "UiUi", "nc", "") +TARGET_BUILTIN(__builtin_loongarch_iocsrrd_d, "UWiUi", "nc", "64bit") +TARGET_BUILTIN(__builtin_loongarch_iocsrwr_b, "vUiUi", "nc", "") +TARGET_BUILTIN(__builtin_loongarch_iocsrwr_h, "vUiUi", "nc", "") +TARGET_BUILTIN(__builtin_loongarch_iocsrwr_w, "vUiUi", "nc", "") +TARGET_BUILTIN(__builtin_loongarch_iocsrwr_d, "vUWiUi", "nc", "64bit") + +TARGET_BUILTIN(__builtin_loongarch_lddir_d, "WiWiIUWi", "nc", "64bit") +TARGET_BUILTIN(__builtin_loongarch_ldpte_d, "vWiIUWi", "nc", "64bit") diff --git a/clang/include/clang/Basic/BuiltinsLoongArchLASX.def b/clang/include/clang/Basic/BuiltinsLoongArchLASX.def new file mode 100644 index 0000000000000000000000000000000000000000..3de200f665b680afdebc08a57c79a844a0783998 --- /dev/null +++ b/clang/include/clang/Basic/BuiltinsLoongArchLASX.def @@ -0,0 +1,982 @@ +//=BuiltinsLoongArchLASX.def - LoongArch Builtin function database -- C++ -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the LoongArch-specific LASX builtin function database. +// Users of this file must define the BUILTIN macro to make use of this +// information. +// +//===----------------------------------------------------------------------===// + +TARGET_BUILTIN(__builtin_lasx_xvadd_b, "V32cV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvadd_h, "V16sV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvadd_w, "V8iV8iV8i", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvadd_d, "V4LLiV4LLiV4LLi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvadd_q, "V4LLiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsub_b, "V32cV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsub_h, "V16sV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsub_w, "V8iV8iV8i", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsub_d, "V4LLiV4LLiV4LLi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsub_q, "V4LLiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvaddi_bu, "V32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvaddi_hu, "V16sV16sIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvaddi_wu, "V8iV8iIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvaddi_du, "V4LLiV4LLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsubi_bu, "V32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsubi_hu, "V16sV16sIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsubi_wu, "V8iV8iIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsubi_du, "V4LLiV4LLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvneg_b, "V32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvneg_h, "V16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvneg_w, "V8iV8i", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvneg_d, "V4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsadd_b, "V32ScV32ScV32Sc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsadd_h, "V16SsV16SsV16Ss", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsadd_w, "V8SiV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsadd_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsadd_bu, "V32UcV32UcV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsadd_hu, "V16UsV16UsV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsadd_wu, "V8UiV8UiV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsadd_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvssub_b, "V32ScV32ScV32Sc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssub_h, "V16SsV16SsV16Ss", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssub_w, "V8SiV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssub_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvssub_bu, "V32UcV32UcV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssub_hu, "V16UsV16UsV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssub_wu, "V8UiV8UiV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssub_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvhaddw_h_b, "V16SsV32ScV32Sc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvhaddw_w_h, "V8SiV16SsV16Ss", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvhaddw_d_w, "V4SLLiV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvhaddw_q_d, "V4LLiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvhaddw_hu_bu, "V16UsV32UcV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvhaddw_wu_hu, "V8UiV16UsV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvhaddw_du_wu, "V4ULLiV8UiV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvhaddw_qu_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvhsubw_h_b, "V16SsV32ScV32Sc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvhsubw_w_h, "V8SiV16SsV16Ss", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvhsubw_d_w, "V4SLLiV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvhsubw_q_d, "V4LLiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvhsubw_hu_bu, "V16UsV32UcV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvhsubw_wu_hu, "V8UiV16UsV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvhsubw_du_wu, "V4ULLiV8UiV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvhsubw_qu_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvaddwev_h_b, "V16sV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvaddwev_w_h, "V8SiV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvaddwev_d_w, "V4LLiV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvaddwev_q_d, "V4LLiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvaddwod_h_b, "V16sV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvaddwod_w_h, "V8SiV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvaddwod_d_w, "V4LLiV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvaddwod_q_d, "V4LLiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsubwev_h_b, "V16sV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsubwev_w_h, "V8SiV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsubwev_d_w, "V4LLiV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsubwev_q_d, "V4LLiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsubwod_h_b, "V16sV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsubwod_w_h, "V8SiV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsubwod_d_w, "V4LLiV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsubwod_q_d, "V4LLiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvaddwev_h_bu, "V16sV32UcV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvaddwev_w_hu, "V8SiV16UsV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvaddwev_d_wu, "V4LLiV8UiV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvaddwev_q_du, "V4LLiV4ULLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvaddwod_h_bu, "V16sV32UcV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvaddwod_w_hu, "V8SiV16UsV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvaddwod_d_wu, "V4LLiV8UiV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvaddwod_q_du, "V4LLiV4ULLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsubwev_h_bu, "V16sV32UcV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsubwev_w_hu, "V8SiV16UsV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsubwev_d_wu, "V4LLiV8UiV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsubwev_q_du, "V4LLiV4ULLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsubwod_h_bu, "V16sV32UcV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsubwod_w_hu, "V8SiV16UsV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsubwod_d_wu, "V4LLiV8UiV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsubwod_q_du, "V4LLiV4ULLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvaddwev_h_bu_b, "V16sV32UcV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvaddwev_w_hu_h, "V8SiV16UsV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvaddwev_d_wu_w, "V4LLiV8UiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvaddwev_q_du_d, "V4LLiV4ULLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvaddwod_h_bu_b, "V16sV32UcV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvaddwod_w_hu_h, "V8SiV16UsV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvaddwod_d_wu_w, "V4LLiV8UiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvaddwod_q_du_d, "V4LLiV4ULLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvavg_b, "V32ScV32ScV32Sc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvavg_h, "V16SsV16SsV16Ss", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvavg_w, "V8SiV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvavg_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvavg_bu, "V32UcV32UcV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvavg_hu, "V16UsV16UsV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvavg_wu, "V8UiV8UiV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvavg_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvavgr_b, "V32ScV32ScV32Sc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvavgr_h, "V16SsV16SsV16Ss", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvavgr_w, "V8SiV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvavgr_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvavgr_bu, "V32UcV32UcV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvavgr_hu, "V16UsV16UsV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvavgr_wu, "V8UiV8UiV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvavgr_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvabsd_b, "V32ScV32ScV32Sc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvabsd_h, "V16SsV16SsV16Ss", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvabsd_w, "V8SiV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvabsd_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvabsd_bu, "V32UcV32UcV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvabsd_hu, "V16UsV16UsV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvabsd_wu, "V8UiV8UiV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvabsd_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvadda_b, "V32ScV32ScV32Sc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvadda_h, "V16SsV16SsV16Ss", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvadda_w, "V8SiV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvadda_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvmax_b, "V32ScV32ScV32Sc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmax_h, "V16SsV16SsV16Ss", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmax_w, "V8SiV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmax_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvmaxi_b, "V32ScV32ScIi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmaxi_h, "V16SsV16SsIi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmaxi_w, "V8SiV8SiIi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmaxi_d, "V4SLLiV4SLLiIi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvmax_bu, "V32UcV32UcV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmax_hu, "V16UsV16UsV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmax_wu, "V8UiV8UiV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmax_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvmaxi_bu, "V32UcV32UcIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmaxi_hu, "V16UsV16UsIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmaxi_wu, "V8UiV8UiIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmaxi_du, "V4ULLiV4ULLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvmin_b, "V32ScV32ScV32Sc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmin_h, "V16SsV16SsV16Ss", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmin_w, "V8SiV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmin_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvmini_b, "V32ScV32ScIi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmini_h, "V16SsV16SsIi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmini_w, "V8SiV8SiIi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmini_d, "V4SLLiV4SLLiIi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvmin_bu, "V32UcV32UcV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmin_hu, "V16UsV16UsV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmin_wu, "V8UiV8UiV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmin_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvmini_bu, "V32UcV32UcIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmini_hu, "V16UsV16UsIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmini_wu, "V8UiV8UiIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmini_du, "V4ULLiV4ULLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvmul_b, "V32ScV32ScV32Sc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmul_h, "V16SsV16SsV16Ss", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmul_w, "V8SiV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmul_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvmuh_b, "V32cV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmuh_h, "V16sV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmuh_w, "V8iV8iV8i", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmuh_d, "V4LLiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvmuh_bu, "V32UcV32UcV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmuh_hu, "V16UsV16UsV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmuh_wu, "V8UiV8UiV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmuh_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvmulwev_h_b, "V16sV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmulwev_w_h, "V8SiV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmulwev_d_w, "V4LLiV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmulwev_q_d, "V4LLiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvmulwod_h_b, "V16sV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmulwod_w_h, "V8SiV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmulwod_d_w, "V4LLiV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmulwod_q_d, "V4LLiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvmulwev_h_bu, "V16sV32UcV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmulwev_w_hu, "V8SiV16UsV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmulwev_d_wu, "V4LLiV8UiV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmulwev_q_du, "V4LLiV4ULLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvmulwod_h_bu, "V16sV32UcV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmulwod_w_hu, "V8SiV16UsV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmulwod_d_wu, "V4LLiV8UiV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmulwod_q_du, "V4LLiV4ULLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvmulwev_h_bu_b, "V16sV32UcV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmulwev_w_hu_h, "V8SiV16UsV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmulwev_d_wu_w, "V4LLiV8UiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmulwev_q_du_d, "V4LLiV4ULLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvmulwod_h_bu_b, "V16sV32UcV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmulwod_w_hu_h, "V8SiV16UsV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmulwod_d_wu_w, "V4LLiV8UiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmulwod_q_du_d, "V4LLiV4ULLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvmadd_b, "V32ScV32ScV32ScV32Sc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmadd_h, "V16SsV16SsV16SsV16Ss", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmadd_w, "V8SiV8SiV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmadd_d, "V4SLLiV4SLLiV4SLLiV4SLLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvmsub_b, "V32ScV32ScV32ScV32Sc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmsub_h, "V16SsV16SsV16SsV16Ss", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmsub_w, "V8SiV8SiV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmsub_d, "V4SLLiV4SLLiV4SLLiV4SLLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvmaddwev_h_b, "V16sV16sV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmaddwev_w_h, "V8SiV8SiV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmaddwev_d_w, "V4LLiV4LLiV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmaddwev_q_d, "V4LLiV4LLiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvmaddwod_h_b, "V16sV16sV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmaddwod_w_h, "V8SiV8SiV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmaddwod_d_w, "V4LLiV4LLiV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmaddwod_q_d, "V4LLiV4LLiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvmaddwev_h_bu, "V16UsV16UsV32UcV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmaddwev_w_hu, "V8UiV8UiV16UsV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmaddwev_d_wu, "V4ULLiV4ULLiV8UiV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmaddwev_q_du, "V4ULLiV4ULLiV4ULLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvmaddwod_h_bu, "V16UsV16UsV32UcV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmaddwod_w_hu, "V8UiV8UiV16UsV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmaddwod_d_wu, "V4ULLiV4ULLiV8UiV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmaddwod_q_du, "V4ULLiV4ULLiV4ULLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvmaddwev_h_bu_b, "V16sV16sV32UcV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmaddwev_w_hu_h, "V8SiV8SiV16UsV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmaddwev_d_wu_w, "V4LLiV4LLiV8UiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmaddwev_q_du_d, "V4LLiV4LLiV4ULLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvmaddwod_h_bu_b, "V16sV16sV32UcV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmaddwod_w_hu_h, "V8SiV8SiV16UsV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmaddwod_d_wu_w, "V4LLiV4LLiV8UiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmaddwod_q_du_d, "V4LLiV4LLiV4ULLiV4LLi", "nc", "lasx") + + +TARGET_BUILTIN(__builtin_lasx_xvdiv_b, "V32ScV32ScV32Sc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvdiv_h, "V16SsV16SsV16Ss", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvdiv_w, "V8SiV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvdiv_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvdiv_bu, "V32UcV32UcV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvdiv_hu, "V16UsV16UsV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvdiv_wu, "V8UiV8UiV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvdiv_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvmod_b, "V32ScV32ScV32Sc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmod_h, "V16SsV16SsV16Ss", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmod_w, "V8SiV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmod_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvmod_bu, "V32UcV32UcV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmod_hu, "V16UsV16UsV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmod_wu, "V8UiV8UiV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmod_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsat_b, "V32ScV32ScIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsat_h, "V16SsV16SsIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsat_w, "V8SiV8SiIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsat_d, "V4SLLiV4SLLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsat_bu, "V32UcV32UcIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsat_hu, "V16UsV16UsIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsat_wu, "V8UiV8UiIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsat_du, "V4ULLiV4ULLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvexth_h_b, "V16sV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvexth_w_h, "V8SiV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvexth_d_w, "V4LLiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvexth_q_d, "V4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvexth_hu_bu, "V16UsV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvexth_wu_hu, "V8UiV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvexth_du_wu, "V4ULLiV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvexth_qu_du, "V4ULLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_vext2xv_h_b, "V16sV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_vext2xv_w_b, "V8SiV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_vext2xv_d_b, "V4LLiV32c", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_vext2xv_w_h, "V8SiV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_vext2xv_d_h, "V4LLiV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_vext2xv_d_w, "V4LLiV8Si", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_vext2xv_hu_bu, "V16sV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_vext2xv_wu_bu, "V8SiV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_vext2xv_du_bu, "V4LLiV32c", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_vext2xv_wu_hu, "V8SiV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_vext2xv_du_hu, "V4LLiV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_vext2xv_du_wu, "V4LLiV8Si", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsigncov_b, "V32ScV32ScV32Sc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsigncov_h, "V16SsV16SsV16Ss", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsigncov_w, "V8SiV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsigncov_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvmskltz_b, "V32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmskltz_h, "V16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmskltz_w, "V8iV8i", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmskltz_d, "V4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvmskgez_b, "V32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvmsknz_b, "V16sV16s", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvldi, "V4LLiIi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvrepli_b, "V32cIi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvrepli_h, "V16sIi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvrepli_w, "V8iIi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvrepli_d, "V4LLiIi", "nc", "lasx") + + +TARGET_BUILTIN(__builtin_lasx_xvand_v, "V32UcV32UcV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvor_v, "V32UcV32UcV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvxor_v, "V32cV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvnor_v, "V32UcV32UcV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvandn_v, "V32UcV32UcV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvorn_v, "V32ScV32ScV32Sc", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvandi_b, "V32UcV32UcIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvori_b, "V32UcV32UcIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvxori_b, "V32UcV32UcIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvnori_b, "V32UcV32UcIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsll_b, "V32cV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsll_h, "V16sV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsll_w, "V8iV8iV8i", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsll_d, "V4LLiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvslli_b, "V32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvslli_h, "V16sV16sIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvslli_w, "V8iV8iIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvslli_d, "V4LLiV4LLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsrl_b, "V32cV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrl_h, "V16sV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrl_w, "V8iV8iV8i", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrl_d, "V4LLiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsrli_b, "V32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrli_h, "V16sV16sIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrli_w, "V8iV8iIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrli_d, "V4LLiV4LLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsra_b, "V32cV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsra_h, "V16sV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsra_w, "V8iV8iV8i", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsra_d, "V4LLiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsrai_b, "V32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrai_h, "V16sV16sIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrai_w, "V8iV8iIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrai_d, "V4LLiV4LLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvrotr_b, "V32cV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvrotr_h, "V16sV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvrotr_w, "V8iV8iV8i", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvrotr_d, "V4LLiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvrotri_b, "V32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvrotri_h, "V16sV16sIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvrotri_w, "V8iV8iIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvrotri_d, "V4LLiV4LLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsllwil_h_b, "V16sV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsllwil_w_h, "V8SiV16sIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsllwil_d_w, "V4LLiV8SiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvextl_q_d, "V4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsllwil_hu_bu, "V16UsV32UcIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsllwil_wu_hu, "V8UiV16UsIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsllwil_du_wu, "V4ULLiV8UiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvextl_qu_du, "V4LLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsrlr_b, "V32cV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrlr_h, "V16sV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrlr_w, "V8iV8iV8i", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrlr_d, "V4LLiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsrlri_b, "V32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrlri_h, "V16sV16sIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrlri_w, "V8iV8iIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrlri_d, "V4LLiV4LLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsrar_b, "V32cV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrar_h, "V16sV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrar_w, "V8iV8iV8i", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrar_d, "V4LLiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsrari_b, "V32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrari_h, "V16sV16sIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrari_w, "V8iV8iIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrari_d, "V4LLiV4LLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsrln_b_h, "V32ScV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrln_h_w, "V16sV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrln_w_d, "V8SiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsran_b_h, "V32ScV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsran_h_w, "V16sV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsran_w_d, "V8SiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsrlni_b_h, "V32cV32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrlni_h_w, "V16sV16sV16sIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrlni_w_d, "V8iV8iV8iIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrlni_d_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsrani_b_h, "V32cV32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrani_h_w, "V16sV16sV16sIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrani_w_d, "V8iV8iV8iIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrani_d_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsrlrn_b_h, "V32ScV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrlrn_h_w, "V16sV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrlrn_w_d, "V8SiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsrarn_b_h, "V32ScV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrarn_h_w, "V16sV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrarn_w_d, "V8SiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsrlrni_b_h, "V32cV32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrlrni_h_w, "V16sV16sV16sIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrlrni_w_d, "V8iV8iV8iIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrlrni_d_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsrarni_b_h, "V32cV32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrarni_h_w, "V16sV16sV16sIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrarni_w_d, "V8iV8iV8iIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsrarni_d_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvssrln_b_h, "V32ScV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrln_h_w, "V16sV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrln_w_d, "V8SiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvssran_b_h, "V32ScV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssran_h_w, "V16sV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssran_w_d, "V8SiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvssrln_bu_h, "V32UcV16UsV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrln_hu_w, "V16UsV8UiV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrln_wu_d, "V8UiV4ULLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvssran_bu_h, "V32UcV16UsV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssran_hu_w, "V16UsV8UiV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssran_wu_d, "V8UiV4ULLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvssrlni_b_h, "V32cV32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrlni_h_w, "V16sV16sV16sIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrlni_w_d, "V8iV8iV8iIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrlni_d_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvssrani_b_h, "V32cV32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrani_h_w, "V16sV16sV16sIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrani_w_d, "V8iV8iV8iIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrani_d_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvssrlrni_bu_h, "V32cV32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrlrni_hu_w, "V16sV16sV16sIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrlrni_wu_d, "V8iV8iV8iIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrlrni_du_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvssrani_bu_h, "V32cV32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrani_hu_w, "V16sV16sV16sIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrani_wu_d, "V8iV8iV8iIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrani_du_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvssrlrn_b_h, "V32ScV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrlrn_h_w, "V16sV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrlrn_w_d, "V8SiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvssrarn_b_h, "V32ScV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrarn_h_w, "V16sV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrarn_w_d, "V8SiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvssrlrn_bu_h, "V32UcV16UsV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrlrn_hu_w, "V16UsV8UiV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrlrn_wu_d, "V8UiV4ULLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvssrarn_bu_h, "V32UcV16UsV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrarn_hu_w, "V16UsV8UiV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrarn_wu_d, "V8UiV4ULLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvssrlrni_b_h, "V32cV32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrlrni_h_w, "V16sV16sV16sIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrlrni_w_d, "V8iV8iV8iIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrlrni_d_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvssrarni_b_h, "V32cV32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrarni_h_w, "V16sV16sV16sIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrarni_w_d, "V8iV8iV8iIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrarni_d_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvssrlni_bu_h, "V32cV32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrlni_hu_w, "V16sV16sV16sIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrlni_wu_d, "V8iV8iV8iIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrlni_du_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvssrarni_bu_h, "V32cV32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrarni_hu_w, "V16sV16sV16sIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrarni_wu_d, "V8iV8iV8iIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvssrarni_du_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvclo_b, "V32ScV32Sc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvclo_h, "V16SsV16Ss", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvclo_w, "V8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvclo_d, "V4SLLiV4SLLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvclz_b, "V32ScV32Sc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvclz_h, "V16SsV16Ss", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvclz_w, "V8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvclz_d, "V4SLLiV4SLLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvpcnt_b, "V32ScV32Sc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvpcnt_h, "V16SsV16Ss", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvpcnt_w, "V8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvpcnt_d, "V4SLLiV4SLLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvbitclr_b, "V32UcV32UcV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvbitclr_h, "V16UsV16UsV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvbitclr_w, "V8UiV8UiV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvbitclr_d, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvbitclri_b, "V32UcV32UcIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvbitclri_h, "V16UsV16UsIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvbitclri_w, "V8UiV8UiIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvbitclri_d, "V4ULLiV4ULLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvbitset_b, "V32UcV32UcV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvbitset_h, "V16UsV16UsV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvbitset_w, "V8UiV8UiV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvbitset_d, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvbitseti_b, "V32UcV32UcIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvbitseti_h, "V16UsV16UsIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvbitseti_w, "V8UiV8UiIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvbitseti_d, "V4ULLiV4ULLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvbitrev_b, "V32UcV32UcV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvbitrev_h, "V16UsV16UsV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvbitrev_w, "V8UiV8UiV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvbitrev_d, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvbitrevi_b, "V32UcV32UcIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvbitrevi_h, "V16UsV16UsIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvbitrevi_w, "V8UiV8UiIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvbitrevi_d, "V4ULLiV4ULLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfrstp_b, "V32ScV32ScV32ScV32Sc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfrstp_h, "V16SsV16SsV16SsV16Ss", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfrstpi_b, "V32cV32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfrstpi_h, "V16sV16sV16sIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfadd_s, "V8fV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfadd_d, "V4dV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfsub_s, "V8fV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfsub_d, "V4dV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfmul_s, "V8fV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfmul_d, "V4dV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfdiv_s, "V8fV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfdiv_d, "V4dV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfmadd_s, "V8fV8fV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfmadd_d, "V4dV4dV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfmsub_s, "V8fV8fV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfmsub_d, "V4dV4dV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfnmadd_s, "V8fV8fV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfnmadd_d, "V4dV4dV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfnmsub_s, "V8fV8fV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfnmsub_d, "V4dV4dV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfmax_s, "V8fV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfmax_d, "V4dV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfmin_s, "V8fV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfmin_d, "V4dV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfmaxa_s, "V8fV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfmaxa_d, "V4dV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfmina_s, "V8fV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfmina_d, "V4dV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvflogb_s, "V8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvflogb_d, "V4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfclass_s, "V8iV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfclass_d, "V4LLiV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfsqrt_s, "V8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfsqrt_d, "V4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfrecip_s, "V8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfrecip_d, "V4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfrsqrt_s, "V8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfrsqrt_d, "V4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfcvtl_s_h, "V8fV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfcvth_s_h, "V8fV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfcvtl_d_s, "V4dV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfcvth_d_s, "V4dV8f", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfcvt_h_s, "V16sV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfcvt_s_d, "V8fV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfrintrne_s, "V8SiV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfrintrne_d, "V4LLiV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfrintrz_s, "V8SiV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfrintrz_d, "V4LLiV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfrintrp_s, "V8SiV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfrintrp_d, "V4LLiV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfrintrm_s, "V8SiV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfrintrm_d, "V4LLiV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfrint_s, "V8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfrint_d, "V4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvftintrne_w_s, "V8SiV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvftintrne_l_d, "V4LLiV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvftintrz_w_s, "V8SiV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvftintrz_l_d, "V4LLiV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvftintrp_w_s, "V8SiV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvftintrp_l_d, "V4LLiV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvftintrm_w_s, "V8SiV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvftintrm_l_d, "V4LLiV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvftint_w_s, "V8SiV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvftint_l_d, "V4SLLiV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvftintrz_wu_s, "V8UiV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvftintrz_lu_d, "V4ULLiV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvftint_wu_s, "V8UiV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvftint_lu_d, "V4ULLiV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvftintrne_w_d, "V8SiV4dV4d", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvftintrz_w_d, "V8SiV4dV4d", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvftintrp_w_d, "V8SiV4dV4d", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvftintrm_w_d, "V8SiV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvftint_w_d, "V8SiV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvftintrnel_l_s, "V4LLiV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvftintrneh_l_s, "V4LLiV8f", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvftintrzl_l_s, "V4LLiV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvftintrzh_l_s, "V4LLiV8f", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvftintrpl_l_s, "V4LLiV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvftintrph_l_s, "V4LLiV8f", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvftintrml_l_s, "V4LLiV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvftintrmh_l_s, "V4LLiV8f", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvftintl_l_s, "V4LLiV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvftinth_l_s, "V4LLiV8f", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvffint_s_w, "V8fV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvffint_d_l, "V4dV4SLLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvffint_s_wu, "V8fV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvffint_d_lu, "V4dV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvffintl_d_w, "V4dV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvffinth_d_w, "V4dV8Si", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvffint_s_l, "V8fV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvseq_b, "V32ScV32ScV32Sc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvseq_h, "V16SsV16SsV16Ss", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvseq_w, "V8SiV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvseq_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvseqi_b, "V32ScV32ScISi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvseqi_h, "V16SsV16SsISi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvseqi_w, "V8SiV8SiISi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvseqi_d, "V4SLLiV4SLLiISi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsle_b, "V32ScV32ScV32Sc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsle_h, "V16SsV16SsV16Ss", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsle_w, "V8SiV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsle_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvslei_b, "V32ScV32ScISi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvslei_h, "V16SsV16SsISi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvslei_w, "V8SiV8SiISi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvslei_d, "V4SLLiV4SLLiISi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvsle_bu, "V32ScV32UcV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsle_hu, "V16SsV16UsV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsle_wu, "V8SiV8UiV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvsle_du, "V4SLLiV4ULLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvslei_bu, "V32ScV32UcIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvslei_hu, "V16SsV16UsIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvslei_wu, "V8SiV8UiIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvslei_du, "V4SLLiV4ULLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvslt_b, "V32ScV32ScV32Sc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvslt_h, "V16SsV16SsV16Ss", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvslt_w, "V8SiV8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvslt_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvslti_b, "V32ScV32ScISi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvslti_h, "V16SsV16SsISi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvslti_w, "V8SiV8SiISi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvslti_d, "V4SLLiV4SLLiISi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvslt_bu, "V32ScV32UcV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvslt_hu, "V16SsV16UsV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvslt_wu, "V8SiV8UiV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvslt_du, "V4SLLiV4ULLiV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvslti_bu, "V32ScV32UcIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvslti_hu, "V16SsV16UsIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvslti_wu, "V8SiV8UiIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvslti_du, "V4SLLiV4ULLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfcmp_caf_s, "V8SiV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfcmp_caf_d, "V4SLLiV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfcmp_cun_s, "V8SiV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfcmp_cun_d, "V4SLLiV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfcmp_ceq_s, "V8SiV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfcmp_ceq_d, "V4SLLiV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfcmp_cueq_s, "V8SiV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfcmp_cueq_d, "V4SLLiV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfcmp_clt_s, "V8SiV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfcmp_clt_d, "V4SLLiV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfcmp_cult_s, "V8SiV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfcmp_cult_d, "V4SLLiV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfcmp_cle_s, "V8SiV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfcmp_cle_d, "V4SLLiV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfcmp_cule_s, "V8SiV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfcmp_cule_d, "V4SLLiV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfcmp_cne_s, "V8SiV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfcmp_cne_d, "V4SLLiV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfcmp_cor_s, "V8SiV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfcmp_cor_d, "V4SLLiV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfcmp_cune_s, "V8SiV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfcmp_cune_d, "V4SLLiV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfcmp_saf_s, "V8SiV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfcmp_saf_d, "V4SLLiV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfcmp_sun_s, "V8SiV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfcmp_sun_d, "V4SLLiV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfcmp_seq_s, "V8SiV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfcmp_seq_d, "V4SLLiV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfcmp_sueq_s, "V8SiV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfcmp_sueq_d, "V4SLLiV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfcmp_slt_s, "V8SiV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfcmp_slt_d, "V4SLLiV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfcmp_sult_s, "V8SiV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfcmp_sult_d, "V4SLLiV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfcmp_sle_s, "V8SiV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfcmp_sle_d, "V4SLLiV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfcmp_sule_s, "V8SiV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfcmp_sule_d, "V4SLLiV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfcmp_sne_s, "V8SiV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfcmp_sne_d, "V4SLLiV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfcmp_sor_s, "V8SiV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfcmp_sor_d, "V4SLLiV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvfcmp_sune_s, "V8SiV8fV8f", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvfcmp_sune_d, "V4SLLiV4dV4d", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvbitsel_v, "V32UcV32UcV32UcV32Uc", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvbitseli_b, "V32UcV32UcV32UcIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvreplgr2vr_b, "V32Sci", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvreplgr2vr_h, "V16Ssi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvreplgr2vr_w, "V8Sii", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvreplgr2vr_d, "V4SLLiLLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvinsgr2vr_w, "V8SiV8SiiIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvinsgr2vr_d, "V4SLLiV4SLLiLLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvpickve2gr_w, "iV8SiIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvpickve2gr_d, "LLiV4SLLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvpickve2gr_wu, "iV8UiIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvpickve2gr_du, "LLiV4ULLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvreplve_b, "V32cV32cUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvreplve_h, "V16sV16sUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvreplve_w, "V8iV8iUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvreplve_d, "V4LLiV4LLiUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvrepl128vei_b, "V32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvrepl128vei_h, "V16sV16sIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvrepl128vei_w, "V8iV8iIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvrepl128vei_d, "V4LLiV4LLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvreplve0_b, "V32ScV32Sc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvreplve0_h, "V16SsV16Ss", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvreplve0_w, "V8SiV8Si", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvreplve0_d, "V4SLLiV4SLLi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvreplve0_q, "V32ScV32Sc", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvinsve0_w, "V8iV8iV8iIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvinsve0_d, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvpickve_w, "V8iV8iIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvpickve_d, "V4LLiV4LLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvpickve_w_f, "V8fV8fIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvpickve_d_f, "V4dV4dIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvbsll_v, "V32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvbsrl_v, "V32cV32cIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvpackev_b, "V32cV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvpackev_h, "V16sV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvpackev_w, "V8iV8iV8i", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvpackev_d, "V4LLiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvpackod_b, "V32cV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvpackod_h, "V16sV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvpackod_w, "V8iV8iV8i", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvpackod_d, "V4LLiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvpickev_b, "V32cV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvpickev_h, "V16sV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvpickev_w, "V8iV8iV8i", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvpickev_d, "V4LLiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvpickod_b, "V32cV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvpickod_h, "V16sV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvpickod_w, "V8iV8iV8i", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvpickod_d, "V4LLiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvilvl_b, "V32cV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvilvl_h, "V16sV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvilvl_w, "V8iV8iV8i", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvilvl_d, "V4LLiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvilvh_b, "V32cV32cV32c", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvilvh_h, "V16sV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvilvh_w, "V8iV8iV8i", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvilvh_d, "V4LLiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvshuf_b, "V32UcV32UcV32UcV32Uc", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvshuf_h, "V16sV16sV16sV16s", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvshuf_w, "V8iV8iV8iV8i", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvshuf_d, "V4LLiV4LLiV4LLiV4LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvperm_w, "V8iV8iV8i", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvshuf4i_b, "V32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvshuf4i_h, "V16sV16sIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvshuf4i_w, "V8iV8iIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvshuf4i_d, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvpermi_w, "V8iV8iV8iIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvpermi_d, "V4LLiV4LLiIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvpermi_q, "V32cV32cV32cIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvextrins_b, "V32cV32cV32cIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvextrins_h, "V16sV16sV16sIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvextrins_w, "V8iV8iV8iIUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvextrins_d, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvld, "V32ScvC*Ii", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvst, "vV32Scv*Ii", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvldx, "V32ScvC*LLi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvstx, "vV32Scv*LLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvldrepl_b, "V32cvC*Ii", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvldrepl_h, "V16svC*Ii", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvldrepl_w, "V8ivC*Ii", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvldrepl_d, "V4LLivC*Ii", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xvstelm_b, "vV32Scv*IiUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvstelm_h, "vV16Ssv*IiUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvstelm_w, "vV8Siv*IiUi", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xvstelm_d, "vV4SLLiv*IiUi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xbz_v, "iV32Uc", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xbnz_v, "iV32Uc", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xbz_b, "iV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xbz_h, "iV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xbz_w, "iV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xbz_d, "iV4ULLi", "nc", "lasx") + +TARGET_BUILTIN(__builtin_lasx_xbnz_b, "iV32Uc", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xbnz_h, "iV16Us", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xbnz_w, "iV8Ui", "nc", "lasx") +TARGET_BUILTIN(__builtin_lasx_xbnz_d, "iV4ULLi", "nc", "lasx") diff --git a/clang/include/clang/Basic/BuiltinsLoongArchLSX.def b/clang/include/clang/Basic/BuiltinsLoongArchLSX.def new file mode 100644 index 0000000000000000000000000000000000000000..8e6aec886c50cd912d5993809cd9bf26b1f92da6 --- /dev/null +++ b/clang/include/clang/Basic/BuiltinsLoongArchLSX.def @@ -0,0 +1,953 @@ +//=============------------- BuiltinsLoongArchLSX.def --------------- C++ -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the LoongArch-specific LSX builtin function database. +// Users of this file must define the BUILTIN macro to make use of this +// information. +// +//===----------------------------------------------------------------------===// + +TARGET_BUILTIN(__builtin_lsx_vadd_b, "V16cV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vadd_h, "V8sV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vadd_w, "V4iV4iV4i", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vadd_d, "V2LLiV2LLiV2LLi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vadd_q, "V2LLiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsub_b, "V16cV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsub_h, "V8sV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsub_w, "V4iV4iV4i", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsub_d, "V2LLiV2LLiV2LLi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsub_q, "V2LLiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vaddi_bu, "V16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vaddi_hu, "V8sV8sIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vaddi_wu, "V4iV4iIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vaddi_du, "V2LLiV2LLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsubi_bu, "V16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsubi_hu, "V8sV8sIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsubi_wu, "V4iV4iIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsubi_du, "V2LLiV2LLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vneg_b, "V16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vneg_h, "V8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vneg_w, "V4iV4i", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vneg_d, "V2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsadd_b, "V16ScV16ScV16Sc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsadd_h, "V8SsV8SsV8Ss", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsadd_w, "V4SiV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsadd_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsadd_bu, "V16UcV16UcV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsadd_hu, "V8UsV8UsV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsadd_wu, "V4UiV4UiV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsadd_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vssub_b, "V16ScV16ScV16Sc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssub_h, "V8SsV8SsV8Ss", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssub_w, "V4SiV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssub_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vssub_bu, "V16UcV16UcV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssub_hu, "V8UsV8UsV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssub_wu, "V4UiV4UiV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssub_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vhaddw_h_b, "V8SsV16ScV16Sc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vhaddw_w_h, "V4SiV8SsV8Ss", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vhaddw_d_w, "V2SLLiV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vhaddw_q_d, "V2LLiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vhaddw_hu_bu, "V8UsV16UcV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vhaddw_wu_hu, "V4UiV8UsV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vhaddw_du_wu, "V2ULLiV4UiV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vhaddw_qu_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vhsubw_h_b, "V8SsV16ScV16Sc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vhsubw_w_h, "V4SiV8SsV8Ss", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vhsubw_d_w, "V2SLLiV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vhsubw_q_d, "V2LLiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vhsubw_hu_bu, "V8UsV16UcV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vhsubw_wu_hu, "V4UiV8UsV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vhsubw_du_wu, "V2ULLiV4UiV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vhsubw_qu_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vaddwev_h_b, "V8sV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vaddwev_w_h, "V4SiV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vaddwev_d_w, "V2LLiV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vaddwev_q_d, "V2LLiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vaddwod_h_b, "V8sV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vaddwod_w_h, "V4SiV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vaddwod_d_w, "V2LLiV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vaddwod_q_d, "V2LLiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsubwev_h_b, "V8sV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsubwev_w_h, "V4SiV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsubwev_d_w, "V2LLiV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsubwev_q_d, "V2LLiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsubwod_h_b, "V8sV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsubwod_w_h, "V4SiV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsubwod_d_w, "V2LLiV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsubwod_q_d, "V2LLiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vaddwev_h_bu, "V8sV16UcV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vaddwev_w_hu, "V4SiV8UsV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vaddwev_d_wu, "V2LLiV4UiV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vaddwev_q_du, "V2LLiV2ULLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vaddwod_h_bu, "V8sV16UcV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vaddwod_w_hu, "V4SiV8UsV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vaddwod_d_wu, "V2LLiV4UiV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vaddwod_q_du, "V2LLiV2ULLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsubwev_h_bu, "V8sV16UcV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsubwev_w_hu, "V4SiV8UsV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsubwev_d_wu, "V2LLiV4UiV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsubwev_q_du, "V2LLiV2ULLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsubwod_h_bu, "V8sV16UcV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsubwod_w_hu, "V4SiV8UsV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsubwod_d_wu, "V2LLiV4UiV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsubwod_q_du, "V2LLiV2ULLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vaddwev_h_bu_b, "V8sV16UcV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vaddwev_w_hu_h, "V4SiV8UsV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vaddwev_d_wu_w, "V2LLiV4UiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vaddwev_q_du_d, "V2LLiV2ULLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vaddwod_h_bu_b, "V8sV16UcV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vaddwod_w_hu_h, "V4SiV8UsV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vaddwod_d_wu_w, "V2LLiV4UiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vaddwod_q_du_d, "V2LLiV2ULLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vavg_b, "V16ScV16ScV16Sc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vavg_h, "V8SsV8SsV8Ss", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vavg_w, "V4SiV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vavg_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vavg_bu, "V16UcV16UcV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vavg_hu, "V8UsV8UsV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vavg_wu, "V4UiV4UiV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vavg_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vavgr_b, "V16ScV16ScV16Sc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vavgr_h, "V8SsV8SsV8Ss", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vavgr_w, "V4SiV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vavgr_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vavgr_bu, "V16UcV16UcV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vavgr_hu, "V8UsV8UsV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vavgr_wu, "V4UiV4UiV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vavgr_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vabsd_b, "V16ScV16ScV16Sc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vabsd_h, "V8SsV8SsV8Ss", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vabsd_w, "V4SiV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vabsd_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vabsd_bu, "V16UcV16UcV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vabsd_hu, "V8UsV8UsV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vabsd_wu, "V4UiV4UiV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vabsd_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vadda_b, "V16ScV16ScV16Sc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vadda_h, "V8SsV8SsV8Ss", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vadda_w, "V4SiV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vadda_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vmax_b, "V16ScV16ScV16Sc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmax_h, "V8SsV8SsV8Ss", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmax_w, "V4SiV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmax_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vmaxi_b, "V16ScV16ScIi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmaxi_h, "V8SsV8SsIi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmaxi_w, "V4SiV4SiIi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmaxi_d, "V2SLLiV2SLLiIi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vmax_bu, "V16UcV16UcV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmax_hu, "V8UsV8UsV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmax_wu, "V4UiV4UiV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmax_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vmaxi_bu, "V16UcV16UcIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmaxi_hu, "V8UsV8UsIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmaxi_wu, "V4UiV4UiIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmaxi_du, "V2ULLiV2ULLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vmin_b, "V16ScV16ScV16Sc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmin_h, "V8SsV8SsV8Ss", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmin_w, "V4SiV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmin_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vmini_b, "V16ScV16ScIi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmini_h, "V8SsV8SsIi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmini_w, "V4SiV4SiIi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmini_d, "V2SLLiV2SLLiIi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vmin_bu, "V16UcV16UcV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmin_hu, "V8UsV8UsV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmin_wu, "V4UiV4UiV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmin_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vmini_bu, "V16UcV16UcIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmini_hu, "V8UsV8UsIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmini_wu, "V4UiV4UiIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmini_du, "V2ULLiV2ULLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vmul_b, "V16ScV16ScV16Sc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmul_h, "V8SsV8SsV8Ss", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmul_w, "V4SiV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmul_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vmuh_b, "V16cV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmuh_h, "V8sV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmuh_w, "V4iV4iV4i", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmuh_d, "V2LLiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vmuh_bu, "V16UcV16UcV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmuh_hu, "V8UsV8UsV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmuh_wu, "V4UiV4UiV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmuh_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vmulwev_h_b, "V8sV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmulwev_w_h, "V4SiV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmulwev_d_w, "V2LLiV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmulwev_q_d, "V2LLiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vmulwod_h_b, "V8sV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmulwod_w_h, "V4SiV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmulwod_d_w, "V2LLiV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmulwod_q_d, "V2LLiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vmulwev_h_bu, "V8sV16UcV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmulwev_w_hu, "V4SiV8UsV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmulwev_d_wu, "V2LLiV4UiV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmulwev_q_du, "V2LLiV2ULLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vmulwod_h_bu, "V8sV16UcV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmulwod_w_hu, "V4SiV8UsV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmulwod_d_wu, "V2LLiV4UiV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmulwod_q_du, "V2LLiV2ULLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vmulwev_h_bu_b, "V8sV16UcV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmulwev_w_hu_h, "V4SiV8UsV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmulwev_d_wu_w, "V2LLiV4UiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmulwev_q_du_d, "V2LLiV2ULLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vmulwod_h_bu_b, "V8sV16UcV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmulwod_w_hu_h, "V4SiV8UsV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmulwod_d_wu_w, "V2LLiV4UiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmulwod_q_du_d, "V2LLiV2ULLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vmadd_b, "V16ScV16ScV16ScV16Sc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmadd_h, "V8SsV8SsV8SsV8Ss", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmadd_w, "V4SiV4SiV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmadd_d, "V2SLLiV2SLLiV2SLLiV2SLLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vmsub_b, "V16ScV16ScV16ScV16Sc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmsub_h, "V8SsV8SsV8SsV8Ss", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmsub_w, "V4SiV4SiV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmsub_d, "V2SLLiV2SLLiV2SLLiV2SLLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vmaddwev_h_b, "V8sV8sV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmaddwev_w_h, "V4SiV4SiV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmaddwev_d_w, "V2LLiV2LLiV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmaddwev_q_d, "V2LLiV2LLiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vmaddwod_h_b, "V8sV8sV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmaddwod_w_h, "V4SiV4SiV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmaddwod_d_w, "V2LLiV2LLiV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmaddwod_q_d, "V2LLiV2LLiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vmaddwev_h_bu, "V8UsV8UsV16UcV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmaddwev_w_hu, "V4UiV4UiV8UsV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmaddwev_d_wu, "V2ULLiV2ULLiV4UiV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmaddwev_q_du, "V2ULLiV2ULLiV2ULLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vmaddwod_h_bu, "V8UsV8UsV16UcV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmaddwod_w_hu, "V4UiV4UiV8UsV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmaddwod_d_wu, "V2ULLiV2ULLiV4UiV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmaddwod_q_du, "V2ULLiV2ULLiV2ULLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vmaddwev_h_bu_b, "V8sV8sV16UcV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmaddwev_w_hu_h, "V4SiV4SiV8UsV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmaddwev_d_wu_w, "V2LLiV2LLiV4UiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmaddwev_q_du_d, "V2LLiV2LLiV2ULLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vmaddwod_h_bu_b, "V8sV8sV16UcV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmaddwod_w_hu_h, "V4SiV4SiV8UsV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmaddwod_d_wu_w, "V2LLiV2LLiV4UiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmaddwod_q_du_d, "V2LLiV2LLiV2ULLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vdiv_b, "V16ScV16ScV16Sc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vdiv_h, "V8SsV8SsV8Ss", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vdiv_w, "V4SiV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vdiv_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vdiv_bu, "V16UcV16UcV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vdiv_hu, "V8UsV8UsV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vdiv_wu, "V4UiV4UiV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vdiv_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vmod_b, "V16ScV16ScV16Sc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmod_h, "V8SsV8SsV8Ss", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmod_w, "V4SiV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmod_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") + + +TARGET_BUILTIN(__builtin_lsx_vmod_bu, "V16UcV16UcV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmod_hu, "V8UsV8UsV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmod_wu, "V4UiV4UiV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmod_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsat_b, "V16ScV16ScIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsat_h, "V8SsV8SsIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsat_w, "V4SiV4SiIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsat_d, "V2SLLiV2SLLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsat_bu, "V16UcV16UcIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsat_hu, "V8UsV8UsIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsat_wu, "V4UiV4UiIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsat_du, "V2ULLiV2ULLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vexth_h_b, "V8sV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vexth_w_h, "V4SiV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vexth_d_w, "V2LLiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vexth_q_d, "V2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vexth_hu_bu, "V8UsV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vexth_wu_hu, "V4UiV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vexth_du_wu, "V2ULLiV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vexth_qu_du, "V2ULLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsigncov_b, "V16ScV16ScV16Sc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsigncov_h, "V8SsV8SsV8Ss", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsigncov_w, "V4SiV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsigncov_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vmskltz_b, "V16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmskltz_h, "V8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmskltz_w, "V4iV4i", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmskltz_d, "V2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vmskgez_b, "V16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vmsknz_b, "V8sV8s", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vldi, "V2LLiIi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vrepli_b, "V16cIi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vrepli_h, "V8sIi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vrepli_w, "V4iIi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vrepli_d, "V2LLiIi", "nc", "lsx") + + +TARGET_BUILTIN(__builtin_lsx_vand_v, "V16UcV16UcV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vor_v, "V16UcV16UcV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vxor_v, "V16cV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vnor_v, "V16UcV16UcV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vandn_v, "V16UcV16UcV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vorn_v, "V16ScV16ScV16Sc", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vandi_b, "V16UcV16UcIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vori_b, "V16UcV16UcIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vxori_b, "V16UcV16UcIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vnori_b, "V16UcV16UcIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsll_b, "V16cV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsll_h, "V8sV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsll_w, "V4iV4iV4i", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsll_d, "V2LLiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vslli_b, "V16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vslli_h, "V8sV8sIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vslli_w, "V4iV4iIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vslli_d, "V2LLiV2LLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsrl_b, "V16cV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrl_h, "V8sV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrl_w, "V4iV4iV4i", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrl_d, "V2LLiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsrli_b, "V16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrli_h, "V8sV8sIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrli_w, "V4iV4iIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrli_d, "V2LLiV2LLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsra_b, "V16cV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsra_h, "V8sV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsra_w, "V4iV4iV4i", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsra_d, "V2LLiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsrai_b, "V16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrai_h, "V8sV8sIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrai_w, "V4iV4iIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrai_d, "V2LLiV2LLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vrotr_b, "V16cV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vrotr_h, "V8sV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vrotr_w, "V4iV4iV4i", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vrotr_d, "V2LLiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vrotri_b, "V16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vrotri_h, "V8sV8sIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vrotri_w, "V4iV4iIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vrotri_d, "V2LLiV2LLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsllwil_h_b, "V8sV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsllwil_w_h, "V4SiV8sIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsllwil_d_w, "V2LLiV4SiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vextl_q_d, "V2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsllwil_hu_bu, "V8UsV16UcIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsllwil_wu_hu, "V4UiV8UsIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsllwil_du_wu, "V2ULLiV4UiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vextl_qu_du, "V2LLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsrlr_b, "V16cV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrlr_h, "V8sV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrlr_w, "V4iV4iV4i", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrlr_d, "V2LLiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsrlri_b, "V16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrlri_h, "V8sV8sIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrlri_w, "V4iV4iIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrlri_d, "V2LLiV2LLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsrar_b, "V16cV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrar_h, "V8sV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrar_w, "V4iV4iV4i", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrar_d, "V2LLiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsrari_b, "V16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrari_h, "V8sV8sIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrari_w, "V4iV4iIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrari_d, "V2LLiV2LLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsrln_b_h, "V16ScV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrln_h_w, "V8sV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrln_w_d, "V4SiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsran_b_h, "V16ScV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsran_h_w, "V8sV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsran_w_d, "V4SiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsrlni_b_h, "V16cV16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrlni_h_w, "V8sV8sV8sIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrlni_w_d, "V4iV4iV4iIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrlni_d_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsrani_b_h, "V16cV16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrani_h_w, "V8sV8sV8sIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrani_w_d, "V4iV4iV4iIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrani_d_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsrlrn_b_h, "V16ScV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrlrn_h_w, "V8sV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrlrn_w_d, "V4SiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsrarn_b_h, "V16ScV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrarn_h_w, "V8sV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrarn_w_d, "V4SiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsrlrni_b_h, "V16cV16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrlrni_h_w, "V8sV8sV8sIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrlrni_w_d, "V4iV4iV4iIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrlrni_d_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsrarni_b_h, "V16cV16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrarni_h_w, "V8sV8sV8sIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrarni_w_d, "V4iV4iV4iIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsrarni_d_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vssrln_b_h, "V16ScV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrln_h_w, "V8sV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrln_w_d, "V4SiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vssran_b_h, "V16ScV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssran_h_w, "V8sV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssran_w_d, "V4SiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vssrln_bu_h, "V16UcV8UsV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrln_hu_w, "V8UsV4UiV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrln_wu_d, "V4UiV2ULLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vssran_bu_h, "V16UcV8UsV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssran_hu_w, "V8UsV4UiV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssran_wu_d, "V4UiV2ULLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vssrlni_b_h, "V16cV16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrlni_h_w, "V8sV8sV8sIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrlni_w_d, "V4iV4iV4iIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrlni_d_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vssrani_b_h, "V16cV16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrani_h_w, "V8sV8sV8sIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrani_w_d, "V4iV4iV4iIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrani_d_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vssrlrni_bu_h, "V16cV16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrlrni_hu_w, "V8sV8sV8sIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrlrni_wu_d, "V4iV4iV4iIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrlrni_du_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vssrani_bu_h, "V16cV16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrani_hu_w, "V8sV8sV8sIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrani_wu_d, "V4iV4iV4iIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrani_du_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vssrlrn_b_h, "V16ScV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrlrn_h_w, "V8sV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrlrn_w_d, "V4SiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vssrarn_b_h, "V16ScV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrarn_h_w, "V8sV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrarn_w_d, "V4SiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vssrlrn_bu_h, "V16UcV8UsV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrlrn_hu_w, "V8UsV4UiV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrlrn_wu_d, "V4UiV2ULLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vssrarn_bu_h, "V16UcV8UsV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrarn_hu_w, "V8UsV4UiV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrarn_wu_d, "V4UiV2ULLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vssrlrni_b_h, "V16cV16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrlrni_h_w, "V8sV8sV8sIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrlrni_w_d, "V4iV4iV4iIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrlrni_d_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vssrarni_b_h, "V16cV16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrarni_h_w, "V8sV8sV8sIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrarni_w_d, "V4iV4iV4iIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrarni_d_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vssrlni_bu_h, "V16cV16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrlni_hu_w, "V8sV8sV8sIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrlni_wu_d, "V4iV4iV4iIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrlni_du_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vssrarni_bu_h, "V16cV16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrarni_hu_w, "V8sV8sV8sIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrarni_wu_d, "V4iV4iV4iIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vssrarni_du_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vclo_b, "V16ScV16Sc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vclo_h, "V8SsV8Ss", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vclo_w, "V4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vclo_d, "V2SLLiV2SLLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vclz_b, "V16ScV16Sc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vclz_h, "V8SsV8Ss", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vclz_w, "V4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vclz_d, "V2SLLiV2SLLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vpcnt_b, "V16ScV16Sc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vpcnt_h, "V8SsV8Ss", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vpcnt_w, "V4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vpcnt_d, "V2SLLiV2SLLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vbitclr_b, "V16UcV16UcV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vbitclr_h, "V8UsV8UsV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vbitclr_w, "V4UiV4UiV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vbitclr_d, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vbitclri_b, "V16UcV16UcIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vbitclri_h, "V8UsV8UsIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vbitclri_w, "V4UiV4UiIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vbitclri_d, "V2ULLiV2ULLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vbitset_b, "V16UcV16UcV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vbitset_h, "V8UsV8UsV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vbitset_w, "V4UiV4UiV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vbitset_d, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vbitseti_b, "V16UcV16UcIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vbitseti_h, "V8UsV8UsIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vbitseti_w, "V4UiV4UiIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vbitseti_d, "V2ULLiV2ULLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vbitrev_b, "V16UcV16UcV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vbitrev_h, "V8UsV8UsV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vbitrev_w, "V4UiV4UiV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vbitrev_d, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vbitrevi_b, "V16UcV16UcIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vbitrevi_h, "V8UsV8UsIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vbitrevi_w, "V4UiV4UiIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vbitrevi_d, "V2ULLiV2ULLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfrstp_b, "V16ScV16ScV16ScV16Sc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfrstp_h, "V8SsV8SsV8SsV8Ss", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfrstpi_b, "V16cV16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfrstpi_h, "V8sV8sV8sIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfadd_s, "V4fV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfadd_d, "V2dV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfsub_s, "V4fV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfsub_d, "V2dV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfmul_s, "V4fV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfmul_d, "V2dV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfdiv_s, "V4fV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfdiv_d, "V2dV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfmadd_s, "V4fV4fV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfmadd_d, "V2dV2dV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfmsub_s, "V4fV4fV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfmsub_d, "V2dV2dV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfnmadd_s, "V4fV4fV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfnmadd_d, "V2dV2dV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfnmsub_s, "V4fV4fV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfnmsub_d, "V2dV2dV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfmax_s, "V4fV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfmax_d, "V2dV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfmin_s, "V4fV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfmin_d, "V2dV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfmaxa_s, "V4fV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfmaxa_d, "V2dV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfmina_s, "V4fV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfmina_d, "V2dV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vflogb_s, "V4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vflogb_d, "V2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfclass_s, "V4iV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfclass_d, "V2LLiV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfsqrt_s, "V4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfsqrt_d, "V2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfrecip_s, "V4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfrecip_d, "V2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfrsqrt_s, "V4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfrsqrt_d, "V2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfcvtl_s_h, "V4fV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfcvtl_d_s, "V2dV4f", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfcvth_s_h, "V4fV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfcvth_d_s, "V2dV4f", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfcvt_h_s, "V8sV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfcvt_s_d, "V4fV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfrintrne_s, "V4SiV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfrintrne_d, "V2LLiV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfrintrz_s, "V4SiV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfrintrz_d, "V2LLiV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfrintrp_s, "V4SiV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfrintrp_d, "V2LLiV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfrintrm_s, "V4SiV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfrintrm_d, "V2LLiV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfrint_s, "V4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfrint_d, "V2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vftintrne_w_s, "V4SiV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vftintrne_l_d, "V2LLiV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vftintrz_w_s, "V4SiV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vftintrz_l_d, "V2LLiV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vftintrp_w_s, "V4SiV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vftintrp_l_d, "V2LLiV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vftintrm_w_s, "V4SiV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vftintrm_l_d, "V2LLiV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vftint_w_s, "V4SiV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vftint_l_d, "V2SLLiV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vftintrz_wu_s, "V4UiV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vftintrz_lu_d, "V2ULLiV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vftint_wu_s, "V4UiV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vftint_lu_d, "V2ULLiV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vftintrne_w_d, "V4SiV2dV2d", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vftintrz_w_d, "V4SiV2dV2d", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vftintrp_w_d, "V4SiV2dV2d", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vftintrm_w_d, "V4SiV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vftint_w_d, "V4SiV2dV2d", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vftintrnel_l_s, "V2LLiV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vftintrneh_l_s, "V2LLiV4f", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vftintrzl_l_s, "V2LLiV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vftintrzh_l_s, "V2LLiV4f", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vftintrpl_l_s, "V2LLiV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vftintrph_l_s, "V2LLiV4f", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vftintrml_l_s, "V2LLiV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vftintrmh_l_s, "V2LLiV4f", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vftintl_l_s, "V2LLiV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vftinth_l_s, "V2LLiV4f", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vffint_s_w, "V4fV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vffint_d_l, "V2dV2SLLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vffint_s_wu, "V4fV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vffint_d_lu, "V2dV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vffintl_d_w, "V2dV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vffinth_d_w, "V2dV4Si", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vffint_s_l, "V4fV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vseq_b, "V16ScV16ScV16Sc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vseq_h, "V8SsV8SsV8Ss", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vseq_w, "V4SiV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vseq_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vseqi_b, "V16ScV16ScISi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vseqi_h, "V8SsV8SsISi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vseqi_w, "V4SiV4SiISi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vseqi_d, "V2SLLiV2SLLiISi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsle_b, "V16ScV16ScV16Sc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsle_h, "V8SsV8SsV8Ss", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsle_w, "V4SiV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsle_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vslei_b, "V16ScV16ScISi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vslei_h, "V8SsV8SsISi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vslei_w, "V4SiV4SiISi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vslei_d, "V2SLLiV2SLLiISi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vsle_bu, "V16ScV16UcV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsle_hu, "V8SsV8UsV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsle_wu, "V4SiV4UiV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vsle_du, "V2SLLiV2ULLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vslei_bu, "V16ScV16UcIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vslei_hu, "V8SsV8UsIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vslei_wu, "V4SiV4UiIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vslei_du, "V2SLLiV2ULLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vslt_b, "V16ScV16ScV16Sc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vslt_h, "V8SsV8SsV8Ss", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vslt_w, "V4SiV4SiV4Si", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vslt_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vslti_b, "V16ScV16ScISi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vslti_h, "V8SsV8SsISi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vslti_w, "V4SiV4SiISi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vslti_d, "V2SLLiV2SLLiISi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vslt_bu, "V16ScV16UcV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vslt_hu, "V8SsV8UsV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vslt_wu, "V4SiV4UiV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vslt_du, "V2SLLiV2ULLiV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vslti_bu, "V16ScV16UcIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vslti_hu, "V8SsV8UsIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vslti_wu, "V4SiV4UiIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vslti_du, "V2SLLiV2ULLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfcmp_caf_s, "V4SiV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfcmp_caf_d, "V2SLLiV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfcmp_cun_s, "V4SiV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfcmp_cun_d, "V2SLLiV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfcmp_ceq_s, "V4SiV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfcmp_ceq_d, "V2SLLiV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfcmp_cueq_s, "V4SiV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfcmp_cueq_d, "V2SLLiV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfcmp_clt_s, "V4SiV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfcmp_clt_d, "V2SLLiV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfcmp_cult_s, "V4SiV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfcmp_cult_d, "V2SLLiV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfcmp_cle_s, "V4SiV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfcmp_cle_d, "V2SLLiV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfcmp_cule_s, "V4SiV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfcmp_cule_d, "V2SLLiV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfcmp_cne_s, "V4SiV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfcmp_cne_d, "V2SLLiV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfcmp_cor_s, "V4SiV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfcmp_cor_d, "V2SLLiV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfcmp_cune_s, "V4SiV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfcmp_cune_d, "V2SLLiV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfcmp_saf_s, "V4SiV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfcmp_saf_d, "V2SLLiV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfcmp_sun_s, "V4SiV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfcmp_sun_d, "V2SLLiV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfcmp_seq_s, "V4SiV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfcmp_seq_d, "V2SLLiV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfcmp_sueq_s, "V4SiV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfcmp_sueq_d, "V2SLLiV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfcmp_slt_s, "V4SiV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfcmp_slt_d, "V2SLLiV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfcmp_sult_s, "V4SiV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfcmp_sult_d, "V2SLLiV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfcmp_sle_s, "V4SiV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfcmp_sle_d, "V2SLLiV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfcmp_sule_s, "V4SiV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfcmp_sule_d, "V2SLLiV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfcmp_sne_s, "V4SiV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfcmp_sne_d, "V2SLLiV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfcmp_sor_s, "V4SiV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfcmp_sor_d, "V2SLLiV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vfcmp_sune_s, "V4SiV4fV4f", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vfcmp_sune_d, "V2SLLiV2dV2d", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vbitsel_v, "V16UcV16UcV16UcV16Uc", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vbitseli_b, "V16UcV16UcV16UcIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vreplgr2vr_b, "V16Sci", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vreplgr2vr_h, "V8Ssi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vreplgr2vr_w, "V4Sii", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vreplgr2vr_d, "V2SLLiLLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vinsgr2vr_b, "V16ScV16SciIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vinsgr2vr_h, "V8SsV8SsiIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vinsgr2vr_w, "V4SiV4SiiIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vinsgr2vr_d, "V2SLLiV2SLLiLLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vpickve2gr_b, "iV16ScIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vpickve2gr_h, "iV8SsIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vpickve2gr_w, "iV4SiIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vpickve2gr_d, "LLiV2SLLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vpickve2gr_bu, "iV16UcIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vpickve2gr_hu, "iV8UsIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vpickve2gr_wu, "iV4UiIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vpickve2gr_du, "LLiV2ULLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vreplve_b, "V16cV16cUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vreplve_h, "V8sV8sUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vreplve_w, "V4iV4iUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vreplve_d, "V2LLiV2LLiUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vreplvei_b, "V16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vreplvei_h, "V8sV8sIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vreplvei_w, "V4iV4iIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vreplvei_d, "V2LLiV2LLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vbsll_v, "V16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vbsrl_v, "V16cV16cIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vpackev_b, "V16cV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vpackev_h, "V8sV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vpackev_w, "V4iV4iV4i", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vpackev_d, "V2LLiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vpackod_b, "V16cV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vpackod_h, "V8sV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vpackod_w, "V4iV4iV4i", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vpackod_d, "V2LLiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vpickev_b, "V16cV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vpickev_h, "V8sV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vpickev_w, "V4iV4iV4i", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vpickev_d, "V2LLiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vpickod_b, "V16cV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vpickod_h, "V8sV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vpickod_w, "V4iV4iV4i", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vpickod_d, "V2LLiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vilvl_b, "V16cV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vilvl_h, "V8sV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vilvl_w, "V4iV4iV4i", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vilvl_d, "V2LLiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vilvh_b, "V16cV16cV16c", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vilvh_h, "V8sV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vilvh_w, "V4iV4iV4i", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vilvh_d, "V2LLiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vshuf_b, "V16UcV16UcV16UcV16Uc", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vshuf_h, "V8sV8sV8sV8s", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vshuf_w, "V4iV4iV4iV4i", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vshuf_d, "V2LLiV2LLiV2LLiV2LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vshuf4i_b, "V16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vshuf4i_h, "V8sV8sIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vshuf4i_w, "V4iV4iIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vshuf4i_d, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vpermi_w, "V4iV4iV4iIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vextrins_b, "V16cV16cV16cIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vextrins_h, "V8sV8sV8sIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vextrins_w, "V4iV4iV4iIUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vextrins_d, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vld, "V16ScvC*Ii", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vst, "vV16Scv*Ii", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vldx, "V16ScvC*LLi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vstx, "vV16Scv*LLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vldrepl_b, "V16cvC*Ii", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vldrepl_h, "V8svC*Ii", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vldrepl_w, "V4ivC*Ii", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vldrepl_d, "V2LLivC*Ii", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_vstelm_b, "vV16Scv*IiUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vstelm_h, "vV8Ssv*IiUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vstelm_w, "vV4Siv*IiUi", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_vstelm_d, "vV2SLLiv*IiUi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_bz_v, "iV16Uc", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_bnz_v, "iV16Uc", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_bz_b, "iV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_bz_h, "iV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_bz_w, "iV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_bz_d, "iV2ULLi", "nc", "lsx") + +TARGET_BUILTIN(__builtin_lsx_bnz_b, "iV16Uc", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_bnz_h, "iV8Us", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_bnz_w, "iV4Ui", "nc", "lsx") +TARGET_BUILTIN(__builtin_lsx_bnz_d, "iV2ULLi", "nc", "lsx") diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index 7d13fcc769c57204e8e68103d4b2affe4437c8de..0c6391b4e8df2e5c1b242b7563cd96fd063c630a 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -691,4 +691,12 @@ def warn_drv_loongarch_conflicting_implied_val : Warning< InGroup; def err_drv_loongarch_invalid_mfpu_EQ : Error< "invalid argument '%0' to -mfpu=; must be one of: 64, 32, none, 0 (alias for none)">; +def err_drv_loongarch_wrong_fpu_width_for_lsx : Error< + "wrong fpu width; LSX depends on 64-bit FPU.">; +def err_drv_loongarch_wrong_fpu_width_for_lasx : Error< + "wrong fpu width; LASX depends on 64-bit FPU.">; +def err_drv_loongarch_invalid_simd_option_combination : Error< + "invalid option combination; LASX depends on LSX.">; +def err_drv_loongarch_invalid_msimd_EQ : Error< + "invalid argument '%0' to -msimd=; must be one of: none, lsx, lasx">; } diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index a6240f32b0339b07473259a33da0ab9b54c28054..0b8450f3117d37940289c73dfcad37e2742aec49 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -3280,6 +3280,8 @@ def warn_objc_redundant_literal_use : Warning< def err_attr_tlsmodel_arg : Error<"tls_model must be \"global-dynamic\", " "\"local-dynamic\", \"initial-exec\" or \"local-exec\"">; +def err_attr_codemodel_arg : Error<"code model '%0' is not supported on this target">; + def err_aix_attr_unsupported_tls_model : Error<"TLS model '%0' is not yet supported on AIX">; def err_tls_var_aligned_over_maximum : Error< @@ -11659,9 +11661,4 @@ def err_non_designated_init_used : Error< def err_cast_from_randomized_struct : Error< "casting from randomized structure pointer type %0 to %1">; -// LoongArch-specific Diagnostics -def err_loongarch_builtin_requires_la64 : Error< - "this builtin requires target: loongarch64">; -def err_loongarch_builtin_requires_la32 : Error< - "this builtin requires target: loongarch32">; } // end of sema component. diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index ac89e170da94596e58a70f7e92e3a0d8dbc307ce..75f9f537b17d5b909f03489badb326a2dac1fc30 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -184,6 +184,8 @@ def m_x86_Features_Group : OptionGroup<"">, Group, Flags<[CoreOption]>, DocName<"X86">; def m_riscv_Features_Group : OptionGroup<"">, Group, DocName<"RISCV">; +def m_loongarch_Features_Group : OptionGroup<"">, + Group, DocName<"LoongArch">; def m_libc_Group : OptionGroup<"">, Group, Flags<[HelpHidden]>; @@ -3844,6 +3846,16 @@ def mstack_protector_guard_reg_EQ : Joined<["-"], "mstack-protector-guard-reg="> def mfentry : Flag<["-"], "mfentry">, HelpText<"Insert calls to fentry at function entry (x86/SystemZ only)">, Flags<[CC1Option]>, Group, MarshallingInfoFlag>; +def mlsx : Flag<["-"], "mlsx">, Group, + HelpText<"Enable Loongson SIMD Extension (LSX).">; +def mno_lsx : Flag<["-"], "mno-lsx">, Group, + HelpText<"Disable Loongson SIMD Extension (LSX).">; +def mlasx : Flag<["-"], "mlasx">, Group, + HelpText<"Enable Loongson Advanced SIMD Extension (LASX).">; +def mno_lasx : Flag<["-"], "mno-lasx">, Group, + HelpText<"Disable Loongson Advanced SIMD Extension (LASX).">; +def msimd_EQ : Joined<["-"], "msimd=">, Group, + HelpText<"Select the SIMD extension(s) to be enabled in LoongArch either 'none', 'lsx', 'lasx'.">; def mnop_mcount : Flag<["-"], "mnop-mcount">, HelpText<"Generate mcount/__fentry__ calls as nops. To activate they need to be patched in.">, Flags<[CC1Option]>, Group, MarshallingInfoFlag>; diff --git a/clang/lib/Basic/Targets/LoongArch.cpp b/clang/lib/Basic/Targets/LoongArch.cpp index f755d8962973e0acffc0915dc16331d76ea666df..9cedc675764f0e2f8095a4dafebeceba19c089a6 100644 --- a/clang/lib/Basic/Targets/LoongArch.cpp +++ b/clang/lib/Basic/Targets/LoongArch.cpp @@ -34,7 +34,17 @@ ArrayRef LoongArchTargetInfo::getGCCRegNames() const { "$f19", "$f20", "$f21", "$f22", "$f23", "$f24", "$f25", "$f26", "$f27", "$f28", "$f29", "$f30", "$f31", // Condition flag registers. - "$fcc0", "$fcc1", "$fcc2", "$fcc3", "$fcc4", "$fcc5", "$fcc6", "$fcc7"}; + "$fcc0", "$fcc1", "$fcc2", "$fcc3", "$fcc4", "$fcc5", "$fcc6", "$fcc7", + // 128-bit vector registers. + "$vr0", "$vr1", "$vr2", "$vr3", "$vr4", "$vr5", "$vr6", "$vr7", "$vr8", + "$vr9", "$vr10", "$vr11", "$vr12", "$vr13", "$vr14", "$vr15", "$vr16", + "$vr17", "$vr18", "$vr19", "$vr20", "$vr21", "$vr22", "$vr23", "$vr24", + "$vr25", "$vr26", "$vr27", "$vr28", "$vr29", "$vr30", "$vr31", + // 256-bit vector registers. + "$xr0", "$xr1", "$xr2", "$xr3", "$xr4", "$xr5", "$xr6", "$xr7", "$xr8", + "$xr9", "$xr10", "$xr11", "$xr12", "$xr13", "$xr14", "$xr15", "$xr16", + "$xr17", "$xr18", "$xr19", "$xr20", "$xr21", "$xr22", "$xr23", "$xr24", + "$xr25", "$xr26", "$xr27", "$xr28", "$xr29", "$xr30", "$xr31"}; return llvm::makeArrayRef(GCCRegNames); } @@ -199,6 +209,15 @@ void LoongArchTargetInfo::getTargetDefines(const LangOptions &Opts, TuneCPU = ArchName; Builder.defineMacro("__loongarch_tune", Twine('"') + TuneCPU + Twine('"')); + if (HasFeatureLASX) { + Builder.defineMacro("__loongarch_simd_width", "256"); + Builder.defineMacro("__loongarch_sx", Twine(1)); + Builder.defineMacro("__loongarch_asx", Twine(1)); + } else if (HasFeatureLSX) { + Builder.defineMacro("__loongarch_simd_width", "128"); + Builder.defineMacro("__loongarch_sx", Twine(1)); + } + StringRef ABI = getABI(); if (ABI == "lp64d" || ABI == "lp64f" || ABI == "lp64s") Builder.defineMacro("__loongarch_lp64"); @@ -224,7 +243,7 @@ static constexpr Builtin::Info BuiltinInfo[] = { #define BUILTIN(ID, TYPE, ATTRS) \ {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr}, #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \ - {#ID, TYPE, ATTRS, FEATURE, ALL_LANGUAGES, nullptr}, + {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE}, #include "clang/Basic/BuiltinsLoongArch.def" }; @@ -248,6 +267,8 @@ bool LoongArchTargetInfo::hasFeature(StringRef Feature) const { .Case("loongarch64", Is64Bit) .Case("32bit", !Is64Bit) .Case("64bit", Is64Bit) + .Case("lsx", HasFeatureLSX) + .Case("lasx", HasFeatureLASX) .Default(false); } @@ -265,7 +286,10 @@ bool LoongArchTargetInfo::handleTargetFeatures( if (Feature == "+d") { HasFeatureD = true; } - } + } else if (Feature == "+lsx") + HasFeatureLSX = true; + else if (Feature == "+lasx") + HasFeatureLASX = true; } return true; } diff --git a/clang/lib/Basic/Targets/LoongArch.h b/clang/lib/Basic/Targets/LoongArch.h index 46ce56b4382230650627169981a407174281944d..c42ffc4fedbb0dfcfb4a9b604d5af65dd3f233c3 100644 --- a/clang/lib/Basic/Targets/LoongArch.h +++ b/clang/lib/Basic/Targets/LoongArch.h @@ -27,15 +27,20 @@ protected: std::string CPU; bool HasFeatureD; bool HasFeatureF; + bool HasFeatureLSX; + bool HasFeatureLASX; public: LoongArchTargetInfo(const llvm::Triple &Triple, const TargetOptions &) : TargetInfo(Triple) { HasFeatureD = false; HasFeatureF = false; + HasFeatureLSX = false; + HasFeatureLASX = false; LongDoubleWidth = 128; LongDoubleAlign = 128; LongDoubleFormat = &llvm::APFloat::IEEEquad(); + MCountName = "_mcount"; SuitableAlign = 128; WCharType = SignedInt; WIntType = UnsignedInt; diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 2de87b4a840da90c638ff77f565d3bb0858d4b84..113c629bf9edca945951f5f88c83cd874a256ea7 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -41,7 +41,6 @@ #include "llvm/IR/IntrinsicsARM.h" #include "llvm/IR/IntrinsicsBPF.h" #include "llvm/IR/IntrinsicsHexagon.h" -#include "llvm/IR/IntrinsicsLoongArch.h" #include "llvm/IR/IntrinsicsNVPTX.h" #include "llvm/IR/IntrinsicsPowerPC.h" #include "llvm/IR/IntrinsicsR600.h" @@ -5434,9 +5433,6 @@ static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF, case llvm::Triple::riscv32: case llvm::Triple::riscv64: return CGF->EmitRISCVBuiltinExpr(BuiltinID, E, ReturnValue); - case llvm::Triple::loongarch32: - case llvm::Triple::loongarch64: - return CGF->EmitLoongArchBuiltinExpr(BuiltinID, E); default: return nullptr; } @@ -19411,129 +19407,3 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID, llvm::Function *F = CGM.getIntrinsic(ID, IntrinsicTypes); return Builder.CreateCall(F, Ops, ""); } - -Value *CodeGenFunction::EmitLoongArchBuiltinExpr(unsigned BuiltinID, - const CallExpr *E) { - SmallVector Ops; - - for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) - Ops.push_back(EmitScalarExpr(E->getArg(i))); - - Intrinsic::ID ID = Intrinsic::not_intrinsic; - - switch (BuiltinID) { - default: - llvm_unreachable("unexpected builtin ID."); - case LoongArch::BI__builtin_loongarch_cacop_d: - ID = Intrinsic::loongarch_cacop_d; - break; - case LoongArch::BI__builtin_loongarch_cacop_w: - ID = Intrinsic::loongarch_cacop_w; - break; - case LoongArch::BI__builtin_loongarch_dbar: - ID = Intrinsic::loongarch_dbar; - break; - case LoongArch::BI__builtin_loongarch_break: - ID = Intrinsic::loongarch_break; - break; - case LoongArch::BI__builtin_loongarch_ibar: - ID = Intrinsic::loongarch_ibar; - break; - case LoongArch::BI__builtin_loongarch_movfcsr2gr: - ID = Intrinsic::loongarch_movfcsr2gr; - break; - case LoongArch::BI__builtin_loongarch_movgr2fcsr: - ID = Intrinsic::loongarch_movgr2fcsr; - break; - case LoongArch::BI__builtin_loongarch_syscall: - ID = Intrinsic::loongarch_syscall; - break; - case LoongArch::BI__builtin_loongarch_crc_w_b_w: - ID = Intrinsic::loongarch_crc_w_b_w; - break; - case LoongArch::BI__builtin_loongarch_crc_w_h_w: - ID = Intrinsic::loongarch_crc_w_h_w; - break; - case LoongArch::BI__builtin_loongarch_crc_w_w_w: - ID = Intrinsic::loongarch_crc_w_w_w; - break; - case LoongArch::BI__builtin_loongarch_crc_w_d_w: - ID = Intrinsic::loongarch_crc_w_d_w; - break; - case LoongArch::BI__builtin_loongarch_crcc_w_b_w: - ID = Intrinsic::loongarch_crcc_w_b_w; - break; - case LoongArch::BI__builtin_loongarch_crcc_w_h_w: - ID = Intrinsic::loongarch_crcc_w_h_w; - break; - case LoongArch::BI__builtin_loongarch_crcc_w_w_w: - ID = Intrinsic::loongarch_crcc_w_w_w; - break; - case LoongArch::BI__builtin_loongarch_crcc_w_d_w: - ID = Intrinsic::loongarch_crcc_w_d_w; - break; - case LoongArch::BI__builtin_loongarch_csrrd_w: - ID = Intrinsic::loongarch_csrrd_w; - break; - case LoongArch::BI__builtin_loongarch_csrwr_w: - ID = Intrinsic::loongarch_csrwr_w; - break; - case LoongArch::BI__builtin_loongarch_csrxchg_w: - ID = Intrinsic::loongarch_csrxchg_w; - break; - case LoongArch::BI__builtin_loongarch_csrrd_d: - ID = Intrinsic::loongarch_csrrd_d; - break; - case LoongArch::BI__builtin_loongarch_csrwr_d: - ID = Intrinsic::loongarch_csrwr_d; - break; - case LoongArch::BI__builtin_loongarch_csrxchg_d: - ID = Intrinsic::loongarch_csrxchg_d; - break; - case LoongArch::BI__builtin_loongarch_iocsrrd_b: - ID = Intrinsic::loongarch_iocsrrd_b; - break; - case LoongArch::BI__builtin_loongarch_iocsrrd_h: - ID = Intrinsic::loongarch_iocsrrd_h; - break; - case LoongArch::BI__builtin_loongarch_iocsrrd_w: - ID = Intrinsic::loongarch_iocsrrd_w; - break; - case LoongArch::BI__builtin_loongarch_iocsrrd_d: - ID = Intrinsic::loongarch_iocsrrd_d; - break; - case LoongArch::BI__builtin_loongarch_iocsrwr_b: - ID = Intrinsic::loongarch_iocsrwr_b; - break; - case LoongArch::BI__builtin_loongarch_iocsrwr_h: - ID = Intrinsic::loongarch_iocsrwr_h; - break; - case LoongArch::BI__builtin_loongarch_iocsrwr_w: - ID = Intrinsic::loongarch_iocsrwr_w; - break; - case LoongArch::BI__builtin_loongarch_iocsrwr_d: - ID = Intrinsic::loongarch_iocsrwr_d; - break; - case LoongArch::BI__builtin_loongarch_cpucfg: - ID = Intrinsic::loongarch_cpucfg; - break; - case LoongArch::BI__builtin_loongarch_asrtle_d: - ID = Intrinsic::loongarch_asrtle_d; - break; - case LoongArch::BI__builtin_loongarch_asrtgt_d: - ID = Intrinsic::loongarch_asrtgt_d; - break; - case LoongArch::BI__builtin_loongarch_lddir_d: - ID = Intrinsic::loongarch_lddir_d; - break; - case LoongArch::BI__builtin_loongarch_ldpte_d: - ID = Intrinsic::loongarch_ldpte_d; - break; - // TODO: Support more Intrinsics. - } - - assert(ID != Intrinsic::not_intrinsic); - - llvm::Function *F = CGM.getIntrinsic(ID); - return Builder.CreateCall(F, Ops); -} diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index f9096d05ecde83d572c4ca386b7c288d982baa23..672acd844525ba423cbf13d07de5f0873941c790 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -4252,7 +4252,6 @@ public: llvm::Value *EmitHexagonBuiltinExpr(unsigned BuiltinID, const CallExpr *E); llvm::Value *EmitRISCVBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue); - llvm::Value *EmitLoongArchBuiltinExpr(unsigned BuiltinID, const CallExpr *E); bool ProcessOrderScopeAMDGCN(llvm::Value *Order, llvm::Value *Scope, llvm::AtomicOrdering &AO, llvm::SyncScope::ID &SSID); diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 173437e231b3e8d4d6a9edf3cb7bab0a061abeb9..59bd9fb04cd70a37597dd32d094ca2c2b6d6821c 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -4351,6 +4351,10 @@ CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName, llvm::Type *Ty, isExternallyVisible(D->getLinkageAndVisibility().getLinkage())) GV->setSection(".cp.rodata"); + // Handle code model attribute + if (const auto *CMA = D->getAttr()) + GV->setCodeModel(CMA->getModel()); + // Check if we a have a const declaration with an initializer, we may be // able to emit it as available_externally to expose it's value to the // optimizer. diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp index dabbd516b820e14269c9fdb153445174d7f7453c..0dea77873b7c5ae765e350bf6bd736870f49059d 100644 --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -11803,10 +11803,11 @@ bool LoongArchABIInfo::detectFARsEligibleStructHelper( // copy constructor are not eligible for the FP calling convention. if (getRecordArgABI(Ty, CGT.getCXXABI())) return false; - if (isEmptyRecord(getContext(), Ty, true, true)) - return true; const RecordDecl *RD = RTy->getDecl(); - // Unions aren't eligible unless they're empty (which is caught above). + if (isEmptyRecord(getContext(), Ty, true, true) && + (!RD->isUnion() || !isa(RD))) + return true; + // Unions aren't eligible unless they're empty in C (which is caught above). if (RD->isUnion()) return false; const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD); @@ -11941,12 +11942,14 @@ ABIArgInfo LoongArchABIInfo::classifyArgumentType(QualType Ty, bool IsFixed, CGCXXABI::RAA_DirectInMemory); } - // Ignore empty structs/unions. - if (isEmptyRecord(getContext(), Ty, true)) - return ABIArgInfo::getIgnore(); - uint64_t Size = getContext().getTypeSize(Ty); + // Ignore empty struct or union whose size is zero, e.g. `struct { }` in C or + // `struct { int a[0]; }` in C++. In C++, `struct { }` is empty but it's size + // is 1 byte and g++ doesn't ignore it; clang++ matches this behaviour. + if (isEmptyRecord(getContext(), Ty, true) && Size == 0) + return ABIArgInfo::getIgnore(); + // Pass floating point values via FARs if possible. if (IsFixed && Ty->isFloatingType() && !Ty->isComplexType() && FRLen >= Size && FARsLeft) { diff --git a/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp b/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp index 745c36ded5a60a91e8bde6e6805689d4b55fba56..940e18a5635255fd700d5a84e2207d7ae285f670 100644 --- a/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp +++ b/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp @@ -169,6 +169,67 @@ void loongarch::getLoongArchTargetFeatures(const Driver &D, // or the alias -m[no-]strict-align. AddTargetFeature(Args, Features, options::OPT_munaligned_access, options::OPT_mno_unaligned_access, "ual"); + + // Select lsx feature determined by -m[no-]lsx. + if (const Arg *A = Args.getLastArg(options::OPT_mlsx, options::OPT_mno_lsx)) { + // LSX depends on 64-bit FPU. + // -m*-float and -mfpu=none/0/32 conflict with -mlsx. + if (A->getOption().matches(options::OPT_mlsx)) { + if (llvm::find(Features, "-d") != Features.end()) + D.Diag(diag::err_drv_loongarch_wrong_fpu_width_for_lsx); + else /*-mlsx*/ + Features.push_back("+lsx"); + } else /*-mno-lsx*/ { + Features.push_back("-lsx"); + } + } + + // Select lasx feature determined by -m[no-]lasx. + if (const Arg *A = + Args.getLastArg(options::OPT_mlasx, options::OPT_mno_lasx)) { + // LASX depends on 64-bit FPU and LSX. + // -mno-lsx conflicts with -mlasx. + if (A->getOption().matches(options::OPT_mlasx)) { + if (llvm::find(Features, "-d") != Features.end()) + D.Diag(diag::err_drv_loongarch_wrong_fpu_width_for_lasx); + else if (llvm::find(Features, "-lsx") != Features.end()) + D.Diag(diag::err_drv_loongarch_invalid_simd_option_combination); + else { /*-mlasx*/ + Features.push_back("+lsx"); + Features.push_back("+lasx"); + } + } else /*-mno-lasx*/ + Features.push_back("-lasx"); + } + + // Select lsx/lasx feature determined by -msimd=. + // Option -msimd= has lower priority than -m[no-]lsx and -m[no-]lasx. + if (const Arg *A = Args.getLastArg(options::OPT_msimd_EQ)) { + StringRef MSIMD = A->getValue(); + if (MSIMD == "lsx") { + // Option -msimd=lsx depends on 64-bit FPU. + // -m*-float and -mfpu=none/0/32 conflict with -mlsx. + if (llvm::find(Features, "-d") != Features.end()) + D.Diag(diag::err_drv_loongarch_wrong_fpu_width_for_lsx); + // The previous option does not contain feature -lsx. + else if (llvm::find(Features, "-lsx") == Features.end()) + Features.push_back("+lsx"); + } else if (MSIMD == "lasx") { + // Option -msimd=lasx depends on 64-bit FPU and LSX. + // -m*-float and -mfpu=none/0/32 conflict with -mlsx. + if (llvm::find(Features, "-d") != Features.end()) + D.Diag(diag::err_drv_loongarch_wrong_fpu_width_for_lasx); + else if (llvm::find(Features, "-lsx") != Features.end()) + D.Diag(diag::err_drv_loongarch_invalid_simd_option_combination); + // The previous option does not contain feature -lasx. + else if (llvm::find(Features, "-lasx") == Features.end()) { + Features.push_back("+lsx"); + Features.push_back("+lasx"); + } + } else if (MSIMD != "none") { + D.Diag(diag::err_drv_loongarch_invalid_msimd_EQ) << MSIMD; + } + } } std::string loongarch::postProcessTargetCPUString(const std::string &CPU, diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 93001072a32a104d94042c701585238607febfbd..1697461b5dc9e9d67f2a2bfe68c7dc8d4d943498 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -5425,10 +5425,15 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, // defaults to -fno-direct-access-external-data. Pass the option if different // from the default. if (Arg *A = Args.getLastArg(options::OPT_fdirect_access_external_data, - options::OPT_fno_direct_access_external_data)) + options::OPT_fno_direct_access_external_data)) { if (A->getOption().matches(options::OPT_fdirect_access_external_data) != (PICLevel == 0)) A->render(Args, CmdArgs); + } else if (PICLevel == 0 && Triple.isLoongArch()) { + // Some targets default to -fno-direct-access-external-data even for + // -fno-pic. + CmdArgs.push_back("-fno-direct-access-external-data"); + } if (Args.hasFlag(options::OPT_fno_plt, options::OPT_fplt, false)) { CmdArgs.push_back("-fno-plt"); @@ -5481,15 +5486,35 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, if (Arg *A = Args.getLastArg(options::OPT_mcmodel_EQ)) { StringRef CM = A->getValue(); - if (CM == "small" || CM == "kernel" || CM == "medium" || CM == "large" || - CM == "tiny") { - if (Triple.isOSAIX() && CM == "medium") - CmdArgs.push_back("-mcmodel=large"); - else - A->render(Args, CmdArgs); + if (Triple.isLoongArch()) { + bool Ok = false; + if (CM == "extreme" && + Args.hasFlagNoClaim(options::OPT_fplt, options::OPT_fno_plt, false)) + D.Diag(diag::err_drv_argument_not_allowed_with) + << A->getAsString(Args) << "-fplt"; + Ok = CM == "normal" || CM == "medium" || CM == "extreme"; + // Convert to LLVM recognizable names. + if (Ok) { + CM = llvm::StringSwitch(CM) + .Case("normal", "small") + .Case("extreme", "large") + .Default(CM); + CmdArgs.push_back(Args.MakeArgString("-mcmodel=" + CM)); + } else { + D.Diag(diag::err_drv_invalid_argument_to_option) + << CM << A->getOption().getName(); + } } else { - D.Diag(diag::err_drv_invalid_argument_to_option) - << CM << A->getOption().getName(); + if (CM == "small" || CM == "kernel" || CM == "medium" || CM == "large" || + CM == "tiny") { + if (Triple.isOSAIX() && CM == "medium") + CmdArgs.push_back("-mcmodel=large"); + else + A->render(Args, CmdArgs); + } else { + D.Diag(diag::err_drv_invalid_argument_to_option) + << CM << A->getOption().getName(); + } } } diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt index 105f85af6171ac67c99e1155338bf862f8164f2c..036f5479b84a7a125bfa6931ee4179ca671951be 100644 --- a/clang/lib/Headers/CMakeLists.txt +++ b/clang/lib/Headers/CMakeLists.txt @@ -70,6 +70,8 @@ set(hlsl_files set(loongarch_files larchintrin.h + lasxintrin.h + lsxintrin.h ) set(mips_msa_files diff --git a/clang/lib/Headers/larchintrin.h b/clang/lib/Headers/larchintrin.h index c5c533ee0b8c1d6e2372244fd8fc186e55a0409a..24dd29ce91ffb9a7f7982ad4b3310fc82d1568d2 100644 --- a/clang/lib/Headers/larchintrin.h +++ b/clang/lib/Headers/larchintrin.h @@ -156,7 +156,7 @@ extern __inline unsigned char return (unsigned char)__builtin_loongarch_iocsrrd_b((unsigned int)_1); } -extern __inline unsigned char +extern __inline unsigned short __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __iocsrrd_h(unsigned int _1) { return (unsigned short)__builtin_loongarch_iocsrrd_h((unsigned int)_1); diff --git a/clang/lib/Headers/lasxintrin.h b/clang/lib/Headers/lasxintrin.h new file mode 100644 index 0000000000000000000000000000000000000000..6b4d5012a24b5893024424c6613265fbbe81c830 --- /dev/null +++ b/clang/lib/Headers/lasxintrin.h @@ -0,0 +1,3860 @@ +/*===------------ lasxintrin.h - LoongArch LASX intrinsics -----------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef _LOONGSON_ASXINTRIN_H +#define _LOONGSON_ASXINTRIN_H 1 + +#if defined(__loongarch_asx) + +typedef signed char v32i8 __attribute__((vector_size(32), aligned(32))); +typedef signed char v32i8_b __attribute__((vector_size(32), aligned(1))); +typedef unsigned char v32u8 __attribute__((vector_size(32), aligned(32))); +typedef unsigned char v32u8_b __attribute__((vector_size(32), aligned(1))); +typedef short v16i16 __attribute__((vector_size(32), aligned(32))); +typedef short v16i16_h __attribute__((vector_size(32), aligned(2))); +typedef unsigned short v16u16 __attribute__((vector_size(32), aligned(32))); +typedef unsigned short v16u16_h __attribute__((vector_size(32), aligned(2))); +typedef int v8i32 __attribute__((vector_size(32), aligned(32))); +typedef int v8i32_w __attribute__((vector_size(32), aligned(4))); +typedef unsigned int v8u32 __attribute__((vector_size(32), aligned(32))); +typedef unsigned int v8u32_w __attribute__((vector_size(32), aligned(4))); +typedef long long v4i64 __attribute__((vector_size(32), aligned(32))); +typedef long long v4i64_d __attribute__((vector_size(32), aligned(8))); +typedef unsigned long long v4u64 __attribute__((vector_size(32), aligned(32))); +typedef unsigned long long v4u64_d __attribute__((vector_size(32), aligned(8))); +typedef float v8f32 __attribute__((vector_size(32), aligned(32))); +typedef float v8f32_w __attribute__((vector_size(32), aligned(4))); +typedef double v4f64 __attribute__((vector_size(32), aligned(32))); +typedef double v4f64_d __attribute__((vector_size(32), aligned(8))); + +typedef double v4f64 __attribute__((vector_size(32), aligned(32))); +typedef double v4f64_d __attribute__((vector_size(32), aligned(8))); + +typedef float __m256 __attribute__((__vector_size__(32), __may_alias__)); +typedef long long __m256i __attribute__((__vector_size__(32), __may_alias__)); +typedef double __m256d __attribute__((__vector_size__(32), __may_alias__)); + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsll_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsll_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsll_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsll_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsll_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsll_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsll_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsll_d((v4i64)_1, (v4i64)_2); +} + +#define __lasx_xvslli_b(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvslli_b((v32i8)(_1), (_2))) + +#define __lasx_xvslli_h(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvslli_h((v16i16)(_1), (_2))) + +#define __lasx_xvslli_w(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvslli_w((v8i32)(_1), (_2))) + +#define __lasx_xvslli_d(/*__m256i*/ _1, /*ui6*/ _2) \ + ((__m256i)__builtin_lasx_xvslli_d((v4i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsra_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsra_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsra_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsra_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsra_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsra_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsra_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsra_d((v4i64)_1, (v4i64)_2); +} + +#define __lasx_xvsrai_b(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvsrai_b((v32i8)(_1), (_2))) + +#define __lasx_xvsrai_h(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvsrai_h((v16i16)(_1), (_2))) + +#define __lasx_xvsrai_w(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvsrai_w((v8i32)(_1), (_2))) + +#define __lasx_xvsrai_d(/*__m256i*/ _1, /*ui6*/ _2) \ + ((__m256i)__builtin_lasx_xvsrai_d((v4i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrar_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrar_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrar_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrar_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrar_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrar_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrar_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrar_d((v4i64)_1, (v4i64)_2); +} + +#define __lasx_xvsrari_b(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvsrari_b((v32i8)(_1), (_2))) + +#define __lasx_xvsrari_h(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvsrari_h((v16i16)(_1), (_2))) + +#define __lasx_xvsrari_w(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvsrari_w((v8i32)(_1), (_2))) + +#define __lasx_xvsrari_d(/*__m256i*/ _1, /*ui6*/ _2) \ + ((__m256i)__builtin_lasx_xvsrari_d((v4i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrl_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrl_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrl_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrl_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrl_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrl_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrl_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrl_d((v4i64)_1, (v4i64)_2); +} + +#define __lasx_xvsrli_b(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvsrli_b((v32i8)(_1), (_2))) + +#define __lasx_xvsrli_h(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvsrli_h((v16i16)(_1), (_2))) + +#define __lasx_xvsrli_w(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvsrli_w((v8i32)(_1), (_2))) + +#define __lasx_xvsrli_d(/*__m256i*/ _1, /*ui6*/ _2) \ + ((__m256i)__builtin_lasx_xvsrli_d((v4i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrlr_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrlr_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrlr_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrlr_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrlr_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrlr_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrlr_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrlr_d((v4i64)_1, (v4i64)_2); +} + +#define __lasx_xvsrlri_b(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvsrlri_b((v32i8)(_1), (_2))) + +#define __lasx_xvsrlri_h(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvsrlri_h((v16i16)(_1), (_2))) + +#define __lasx_xvsrlri_w(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvsrlri_w((v8i32)(_1), (_2))) + +#define __lasx_xvsrlri_d(/*__m256i*/ _1, /*ui6*/ _2) \ + ((__m256i)__builtin_lasx_xvsrlri_d((v4i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvbitclr_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvbitclr_b((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvbitclr_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvbitclr_h((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvbitclr_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvbitclr_w((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvbitclr_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvbitclr_d((v4u64)_1, (v4u64)_2); +} + +#define __lasx_xvbitclri_b(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvbitclri_b((v32u8)(_1), (_2))) + +#define __lasx_xvbitclri_h(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvbitclri_h((v16u16)(_1), (_2))) + +#define __lasx_xvbitclri_w(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvbitclri_w((v8u32)(_1), (_2))) + +#define __lasx_xvbitclri_d(/*__m256i*/ _1, /*ui6*/ _2) \ + ((__m256i)__builtin_lasx_xvbitclri_d((v4u64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvbitset_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvbitset_b((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvbitset_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvbitset_h((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvbitset_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvbitset_w((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvbitset_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvbitset_d((v4u64)_1, (v4u64)_2); +} + +#define __lasx_xvbitseti_b(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvbitseti_b((v32u8)(_1), (_2))) + +#define __lasx_xvbitseti_h(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvbitseti_h((v16u16)(_1), (_2))) + +#define __lasx_xvbitseti_w(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvbitseti_w((v8u32)(_1), (_2))) + +#define __lasx_xvbitseti_d(/*__m256i*/ _1, /*ui6*/ _2) \ + ((__m256i)__builtin_lasx_xvbitseti_d((v4u64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvbitrev_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvbitrev_b((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvbitrev_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvbitrev_h((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvbitrev_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvbitrev_w((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvbitrev_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvbitrev_d((v4u64)_1, (v4u64)_2); +} + +#define __lasx_xvbitrevi_b(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvbitrevi_b((v32u8)(_1), (_2))) + +#define __lasx_xvbitrevi_h(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvbitrevi_h((v16u16)(_1), (_2))) + +#define __lasx_xvbitrevi_w(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvbitrevi_w((v8u32)(_1), (_2))) + +#define __lasx_xvbitrevi_d(/*__m256i*/ _1, /*ui6*/ _2) \ + ((__m256i)__builtin_lasx_xvbitrevi_d((v4u64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvadd_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvadd_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvadd_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvadd_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvadd_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvadd_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvadd_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvadd_d((v4i64)_1, (v4i64)_2); +} + +#define __lasx_xvaddi_bu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvaddi_bu((v32i8)(_1), (_2))) + +#define __lasx_xvaddi_hu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvaddi_hu((v16i16)(_1), (_2))) + +#define __lasx_xvaddi_wu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvaddi_wu((v8i32)(_1), (_2))) + +#define __lasx_xvaddi_du(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvaddi_du((v4i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsub_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsub_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsub_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsub_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsub_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsub_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsub_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsub_d((v4i64)_1, (v4i64)_2); +} + +#define __lasx_xvsubi_bu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvsubi_bu((v32i8)(_1), (_2))) + +#define __lasx_xvsubi_hu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvsubi_hu((v16i16)(_1), (_2))) + +#define __lasx_xvsubi_wu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvsubi_wu((v8i32)(_1), (_2))) + +#define __lasx_xvsubi_du(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvsubi_du((v4i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmax_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmax_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmax_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmax_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmax_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmax_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmax_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmax_d((v4i64)_1, (v4i64)_2); +} + +#define __lasx_xvmaxi_b(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvmaxi_b((v32i8)(_1), (_2))) + +#define __lasx_xvmaxi_h(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvmaxi_h((v16i16)(_1), (_2))) + +#define __lasx_xvmaxi_w(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvmaxi_w((v8i32)(_1), (_2))) + +#define __lasx_xvmaxi_d(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvmaxi_d((v4i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmax_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmax_bu((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmax_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmax_hu((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmax_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmax_wu((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmax_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmax_du((v4u64)_1, (v4u64)_2); +} + +#define __lasx_xvmaxi_bu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvmaxi_bu((v32u8)(_1), (_2))) + +#define __lasx_xvmaxi_hu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvmaxi_hu((v16u16)(_1), (_2))) + +#define __lasx_xvmaxi_wu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvmaxi_wu((v8u32)(_1), (_2))) + +#define __lasx_xvmaxi_du(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvmaxi_du((v4u64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmin_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmin_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmin_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmin_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmin_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmin_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmin_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmin_d((v4i64)_1, (v4i64)_2); +} + +#define __lasx_xvmini_b(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvmini_b((v32i8)(_1), (_2))) + +#define __lasx_xvmini_h(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvmini_h((v16i16)(_1), (_2))) + +#define __lasx_xvmini_w(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvmini_w((v8i32)(_1), (_2))) + +#define __lasx_xvmini_d(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvmini_d((v4i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmin_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmin_bu((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmin_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmin_hu((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmin_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmin_wu((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmin_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmin_du((v4u64)_1, (v4u64)_2); +} + +#define __lasx_xvmini_bu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvmini_bu((v32u8)(_1), (_2))) + +#define __lasx_xvmini_hu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvmini_hu((v16u16)(_1), (_2))) + +#define __lasx_xvmini_wu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvmini_wu((v8u32)(_1), (_2))) + +#define __lasx_xvmini_du(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvmini_du((v4u64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvseq_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvseq_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvseq_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvseq_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvseq_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvseq_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvseq_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvseq_d((v4i64)_1, (v4i64)_2); +} + +#define __lasx_xvseqi_b(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvseqi_b((v32i8)(_1), (_2))) + +#define __lasx_xvseqi_h(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvseqi_h((v16i16)(_1), (_2))) + +#define __lasx_xvseqi_w(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvseqi_w((v8i32)(_1), (_2))) + +#define __lasx_xvseqi_d(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvseqi_d((v4i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvslt_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvslt_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvslt_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvslt_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvslt_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvslt_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvslt_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvslt_d((v4i64)_1, (v4i64)_2); +} + +#define __lasx_xvslti_b(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvslti_b((v32i8)(_1), (_2))) + +#define __lasx_xvslti_h(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvslti_h((v16i16)(_1), (_2))) + +#define __lasx_xvslti_w(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvslti_w((v8i32)(_1), (_2))) + +#define __lasx_xvslti_d(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvslti_d((v4i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvslt_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvslt_bu((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvslt_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvslt_hu((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvslt_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvslt_wu((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvslt_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvslt_du((v4u64)_1, (v4u64)_2); +} + +#define __lasx_xvslti_bu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvslti_bu((v32u8)(_1), (_2))) + +#define __lasx_xvslti_hu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvslti_hu((v16u16)(_1), (_2))) + +#define __lasx_xvslti_wu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvslti_wu((v8u32)(_1), (_2))) + +#define __lasx_xvslti_du(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvslti_du((v4u64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsle_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsle_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsle_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsle_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsle_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsle_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsle_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsle_d((v4i64)_1, (v4i64)_2); +} + +#define __lasx_xvslei_b(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvslei_b((v32i8)(_1), (_2))) + +#define __lasx_xvslei_h(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvslei_h((v16i16)(_1), (_2))) + +#define __lasx_xvslei_w(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvslei_w((v8i32)(_1), (_2))) + +#define __lasx_xvslei_d(/*__m256i*/ _1, /*si5*/ _2) \ + ((__m256i)__builtin_lasx_xvslei_d((v4i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsle_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsle_bu((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsle_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsle_hu((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsle_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsle_wu((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsle_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsle_du((v4u64)_1, (v4u64)_2); +} + +#define __lasx_xvslei_bu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvslei_bu((v32u8)(_1), (_2))) + +#define __lasx_xvslei_hu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvslei_hu((v16u16)(_1), (_2))) + +#define __lasx_xvslei_wu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvslei_wu((v8u32)(_1), (_2))) + +#define __lasx_xvslei_du(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvslei_du((v4u64)(_1), (_2))) + +#define __lasx_xvsat_b(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvsat_b((v32i8)(_1), (_2))) + +#define __lasx_xvsat_h(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvsat_h((v16i16)(_1), (_2))) + +#define __lasx_xvsat_w(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvsat_w((v8i32)(_1), (_2))) + +#define __lasx_xvsat_d(/*__m256i*/ _1, /*ui6*/ _2) \ + ((__m256i)__builtin_lasx_xvsat_d((v4i64)(_1), (_2))) + +#define __lasx_xvsat_bu(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvsat_bu((v32u8)(_1), (_2))) + +#define __lasx_xvsat_hu(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvsat_hu((v16u16)(_1), (_2))) + +#define __lasx_xvsat_wu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvsat_wu((v8u32)(_1), (_2))) + +#define __lasx_xvsat_du(/*__m256i*/ _1, /*ui6*/ _2) \ + ((__m256i)__builtin_lasx_xvsat_du((v4u64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvadda_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvadda_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvadda_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvadda_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvadda_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvadda_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvadda_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvadda_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsadd_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsadd_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsadd_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsadd_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsadd_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsadd_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsadd_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsadd_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsadd_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsadd_bu((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsadd_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsadd_hu((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsadd_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsadd_wu((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsadd_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsadd_du((v4u64)_1, (v4u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavg_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavg_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavg_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavg_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavg_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavg_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavg_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavg_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavg_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavg_bu((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavg_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavg_hu((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavg_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavg_wu((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavg_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavg_du((v4u64)_1, (v4u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavgr_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavgr_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavgr_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavgr_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavgr_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavgr_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavgr_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavgr_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavgr_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavgr_bu((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavgr_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavgr_hu((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavgr_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavgr_wu((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvavgr_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvavgr_du((v4u64)_1, (v4u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssub_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssub_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssub_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssub_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssub_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssub_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssub_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssub_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssub_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssub_bu((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssub_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssub_hu((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssub_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssub_wu((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssub_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssub_du((v4u64)_1, (v4u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvabsd_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvabsd_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvabsd_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvabsd_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvabsd_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvabsd_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvabsd_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvabsd_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvabsd_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvabsd_bu((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvabsd_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvabsd_hu((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvabsd_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvabsd_wu((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvabsd_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvabsd_du((v4u64)_1, (v4u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmul_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmul_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmul_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmul_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmul_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmul_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmul_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmul_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmadd_b(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmadd_b((v32i8)_1, (v32i8)_2, (v32i8)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmadd_h(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmadd_h((v16i16)_1, (v16i16)_2, (v16i16)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmadd_w(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmadd_w((v8i32)_1, (v8i32)_2, (v8i32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmadd_d(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmadd_d((v4i64)_1, (v4i64)_2, (v4i64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmsub_b(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmsub_b((v32i8)_1, (v32i8)_2, (v32i8)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmsub_h(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmsub_h((v16i16)_1, (v16i16)_2, (v16i16)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmsub_w(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmsub_w((v8i32)_1, (v8i32)_2, (v8i32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmsub_d(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmsub_d((v4i64)_1, (v4i64)_2, (v4i64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvdiv_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvdiv_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvdiv_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvdiv_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvdiv_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvdiv_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvdiv_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvdiv_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvdiv_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvdiv_bu((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvdiv_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvdiv_hu((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvdiv_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvdiv_wu((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvdiv_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvdiv_du((v4u64)_1, (v4u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhaddw_h_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhaddw_h_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhaddw_w_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhaddw_w_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhaddw_d_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhaddw_d_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhaddw_hu_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhaddw_hu_bu((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhaddw_wu_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhaddw_wu_hu((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhaddw_du_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhaddw_du_wu((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhsubw_h_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhsubw_h_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhsubw_w_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhsubw_w_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhsubw_d_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhsubw_d_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhsubw_hu_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhsubw_hu_bu((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhsubw_wu_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhsubw_wu_hu((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhsubw_du_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhsubw_du_wu((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmod_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmod_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmod_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmod_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmod_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmod_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmod_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmod_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmod_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmod_bu((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmod_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmod_hu((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmod_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmod_wu((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmod_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmod_du((v4u64)_1, (v4u64)_2); +} + +#define __lasx_xvrepl128vei_b(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvrepl128vei_b((v32i8)(_1), (_2))) + +#define __lasx_xvrepl128vei_h(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvrepl128vei_h((v16i16)(_1), (_2))) + +#define __lasx_xvrepl128vei_w(/*__m256i*/ _1, /*ui2*/ _2) \ + ((__m256i)__builtin_lasx_xvrepl128vei_w((v8i32)(_1), (_2))) + +#define __lasx_xvrepl128vei_d(/*__m256i*/ _1, /*ui1*/ _2) \ + ((__m256i)__builtin_lasx_xvrepl128vei_d((v4i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpickev_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpickev_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpickev_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpickev_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpickev_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpickev_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpickev_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpickev_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpickod_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpickod_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpickod_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpickod_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpickod_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpickod_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpickod_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpickod_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvilvh_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvilvh_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvilvh_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvilvh_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvilvh_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvilvh_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvilvh_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvilvh_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvilvl_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvilvl_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvilvl_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvilvl_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvilvl_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvilvl_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvilvl_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvilvl_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpackev_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpackev_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpackev_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpackev_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpackev_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpackev_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpackev_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpackev_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpackod_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpackod_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpackod_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpackod_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpackod_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpackod_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpackod_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvpackod_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvshuf_b(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvshuf_b((v32i8)_1, (v32i8)_2, (v32i8)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvshuf_h(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvshuf_h((v16i16)_1, (v16i16)_2, (v16i16)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvshuf_w(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvshuf_w((v8i32)_1, (v8i32)_2, (v8i32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvshuf_d(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvshuf_d((v4i64)_1, (v4i64)_2, (v4i64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvand_v(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvand_v((v32u8)_1, (v32u8)_2); +} + +#define __lasx_xvandi_b(/*__m256i*/ _1, /*ui8*/ _2) \ + ((__m256i)__builtin_lasx_xvandi_b((v32u8)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvor_v(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvor_v((v32u8)_1, (v32u8)_2); +} + +#define __lasx_xvori_b(/*__m256i*/ _1, /*ui8*/ _2) \ + ((__m256i)__builtin_lasx_xvori_b((v32u8)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvnor_v(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvnor_v((v32u8)_1, (v32u8)_2); +} + +#define __lasx_xvnori_b(/*__m256i*/ _1, /*ui8*/ _2) \ + ((__m256i)__builtin_lasx_xvnori_b((v32u8)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvxor_v(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvxor_v((v32u8)_1, (v32u8)_2); +} + +#define __lasx_xvxori_b(/*__m256i*/ _1, /*ui8*/ _2) \ + ((__m256i)__builtin_lasx_xvxori_b((v32u8)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvbitsel_v(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvbitsel_v((v32u8)_1, (v32u8)_2, (v32u8)_3); +} + +#define __lasx_xvbitseli_b(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ + ((__m256i)__builtin_lasx_xvbitseli_b((v32u8)(_1), (v32u8)(_2), (_3))) + +#define __lasx_xvshuf4i_b(/*__m256i*/ _1, /*ui8*/ _2) \ + ((__m256i)__builtin_lasx_xvshuf4i_b((v32i8)(_1), (_2))) + +#define __lasx_xvshuf4i_h(/*__m256i*/ _1, /*ui8*/ _2) \ + ((__m256i)__builtin_lasx_xvshuf4i_h((v16i16)(_1), (_2))) + +#define __lasx_xvshuf4i_w(/*__m256i*/ _1, /*ui8*/ _2) \ + ((__m256i)__builtin_lasx_xvshuf4i_w((v8i32)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvreplgr2vr_b(int _1) { + return (__m256i)__builtin_lasx_xvreplgr2vr_b((int)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvreplgr2vr_h(int _1) { + return (__m256i)__builtin_lasx_xvreplgr2vr_h((int)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvreplgr2vr_w(int _1) { + return (__m256i)__builtin_lasx_xvreplgr2vr_w((int)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvreplgr2vr_d(long int _1) { + return (__m256i)__builtin_lasx_xvreplgr2vr_d((long int)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpcnt_b(__m256i _1) { + return (__m256i)__builtin_lasx_xvpcnt_b((v32i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpcnt_h(__m256i _1) { + return (__m256i)__builtin_lasx_xvpcnt_h((v16i16)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpcnt_w(__m256i _1) { + return (__m256i)__builtin_lasx_xvpcnt_w((v8i32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvpcnt_d(__m256i _1) { + return (__m256i)__builtin_lasx_xvpcnt_d((v4i64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvclo_b(__m256i _1) { + return (__m256i)__builtin_lasx_xvclo_b((v32i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvclo_h(__m256i _1) { + return (__m256i)__builtin_lasx_xvclo_h((v16i16)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvclo_w(__m256i _1) { + return (__m256i)__builtin_lasx_xvclo_w((v8i32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvclo_d(__m256i _1) { + return (__m256i)__builtin_lasx_xvclo_d((v4i64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvclz_b(__m256i _1) { + return (__m256i)__builtin_lasx_xvclz_b((v32i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvclz_h(__m256i _1) { + return (__m256i)__builtin_lasx_xvclz_h((v16i16)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvclz_w(__m256i _1) { + return (__m256i)__builtin_lasx_xvclz_w((v8i32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvclz_d(__m256i _1) { + return (__m256i)__builtin_lasx_xvclz_d((v4i64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfadd_s(__m256 _1, __m256 _2) { + return (__m256)__builtin_lasx_xvfadd_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfadd_d(__m256d _1, __m256d _2) { + return (__m256d)__builtin_lasx_xvfadd_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfsub_s(__m256 _1, __m256 _2) { + return (__m256)__builtin_lasx_xvfsub_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfsub_d(__m256d _1, __m256d _2) { + return (__m256d)__builtin_lasx_xvfsub_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfmul_s(__m256 _1, __m256 _2) { + return (__m256)__builtin_lasx_xvfmul_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfmul_d(__m256d _1, __m256d _2) { + return (__m256d)__builtin_lasx_xvfmul_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfdiv_s(__m256 _1, __m256 _2) { + return (__m256)__builtin_lasx_xvfdiv_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfdiv_d(__m256d _1, __m256d _2) { + return (__m256d)__builtin_lasx_xvfdiv_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcvt_h_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcvt_h_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfcvt_s_d(__m256d _1, __m256d _2) { + return (__m256)__builtin_lasx_xvfcvt_s_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfmin_s(__m256 _1, __m256 _2) { + return (__m256)__builtin_lasx_xvfmin_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfmin_d(__m256d _1, __m256d _2) { + return (__m256d)__builtin_lasx_xvfmin_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfmina_s(__m256 _1, __m256 _2) { + return (__m256)__builtin_lasx_xvfmina_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfmina_d(__m256d _1, __m256d _2) { + return (__m256d)__builtin_lasx_xvfmina_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfmax_s(__m256 _1, __m256 _2) { + return (__m256)__builtin_lasx_xvfmax_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfmax_d(__m256d _1, __m256d _2) { + return (__m256d)__builtin_lasx_xvfmax_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfmaxa_s(__m256 _1, __m256 _2) { + return (__m256)__builtin_lasx_xvfmaxa_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfmaxa_d(__m256d _1, __m256d _2) { + return (__m256d)__builtin_lasx_xvfmaxa_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfclass_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvfclass_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfclass_d(__m256d _1) { + return (__m256i)__builtin_lasx_xvfclass_d((v4f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfsqrt_s(__m256 _1) { + return (__m256)__builtin_lasx_xvfsqrt_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfsqrt_d(__m256d _1) { + return (__m256d)__builtin_lasx_xvfsqrt_d((v4f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfrecip_s(__m256 _1) { + return (__m256)__builtin_lasx_xvfrecip_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfrecip_d(__m256d _1) { + return (__m256d)__builtin_lasx_xvfrecip_d((v4f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfrint_s(__m256 _1) { + return (__m256)__builtin_lasx_xvfrint_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfrint_d(__m256d _1) { + return (__m256d)__builtin_lasx_xvfrint_d((v4f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfrsqrt_s(__m256 _1) { + return (__m256)__builtin_lasx_xvfrsqrt_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfrsqrt_d(__m256d _1) { + return (__m256d)__builtin_lasx_xvfrsqrt_d((v4f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvflogb_s(__m256 _1) { + return (__m256)__builtin_lasx_xvflogb_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvflogb_d(__m256d _1) { + return (__m256d)__builtin_lasx_xvflogb_d((v4f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfcvth_s_h(__m256i _1) { + return (__m256)__builtin_lasx_xvfcvth_s_h((v16i16)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfcvth_d_s(__m256 _1) { + return (__m256d)__builtin_lasx_xvfcvth_d_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfcvtl_s_h(__m256i _1) { + return (__m256)__builtin_lasx_xvfcvtl_s_h((v16i16)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfcvtl_d_s(__m256 _1) { + return (__m256d)__builtin_lasx_xvfcvtl_d_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftint_w_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftint_w_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftint_l_d(__m256d _1) { + return (__m256i)__builtin_lasx_xvftint_l_d((v4f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftint_wu_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftint_wu_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftint_lu_d(__m256d _1) { + return (__m256i)__builtin_lasx_xvftint_lu_d((v4f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrz_w_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftintrz_w_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrz_l_d(__m256d _1) { + return (__m256i)__builtin_lasx_xvftintrz_l_d((v4f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrz_wu_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftintrz_wu_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrz_lu_d(__m256d _1) { + return (__m256i)__builtin_lasx_xvftintrz_lu_d((v4f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvffint_s_w(__m256i _1) { + return (__m256)__builtin_lasx_xvffint_s_w((v8i32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvffint_d_l(__m256i _1) { + return (__m256d)__builtin_lasx_xvffint_d_l((v4i64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvffint_s_wu(__m256i _1) { + return (__m256)__builtin_lasx_xvffint_s_wu((v8u32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvffint_d_lu(__m256i _1) { + return (__m256d)__builtin_lasx_xvffint_d_lu((v4u64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvreplve_b(__m256i _1, int _2) { + return (__m256i)__builtin_lasx_xvreplve_b((v32i8)_1, (int)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvreplve_h(__m256i _1, int _2) { + return (__m256i)__builtin_lasx_xvreplve_h((v16i16)_1, (int)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvreplve_w(__m256i _1, int _2) { + return (__m256i)__builtin_lasx_xvreplve_w((v8i32)_1, (int)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvreplve_d(__m256i _1, int _2) { + return (__m256i)__builtin_lasx_xvreplve_d((v4i64)_1, (int)_2); +} + +#define __lasx_xvpermi_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ + ((__m256i)__builtin_lasx_xvpermi_w((v8i32)(_1), (v8i32)(_2), (_3))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvandn_v(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvandn_v((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvneg_b(__m256i _1) { + return (__m256i)__builtin_lasx_xvneg_b((v32i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvneg_h(__m256i _1) { + return (__m256i)__builtin_lasx_xvneg_h((v16i16)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvneg_w(__m256i _1) { + return (__m256i)__builtin_lasx_xvneg_w((v8i32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvneg_d(__m256i _1) { + return (__m256i)__builtin_lasx_xvneg_d((v4i64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmuh_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmuh_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmuh_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmuh_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmuh_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmuh_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmuh_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmuh_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmuh_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmuh_bu((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmuh_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmuh_hu((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmuh_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmuh_wu((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmuh_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmuh_du((v4u64)_1, (v4u64)_2); +} + +#define __lasx_xvsllwil_h_b(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvsllwil_h_b((v32i8)(_1), (_2))) + +#define __lasx_xvsllwil_w_h(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvsllwil_w_h((v16i16)(_1), (_2))) + +#define __lasx_xvsllwil_d_w(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvsllwil_d_w((v8i32)(_1), (_2))) + +#define __lasx_xvsllwil_hu_bu(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvsllwil_hu_bu((v32u8)(_1), (_2))) + +#define __lasx_xvsllwil_wu_hu(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvsllwil_wu_hu((v16u16)(_1), (_2))) + +#define __lasx_xvsllwil_du_wu(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvsllwil_du_wu((v8u32)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsran_b_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsran_b_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsran_h_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsran_h_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsran_w_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsran_w_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssran_b_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssran_b_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssran_h_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssran_h_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssran_w_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssran_w_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssran_bu_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssran_bu_h((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssran_hu_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssran_hu_w((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssran_wu_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssran_wu_d((v4u64)_1, (v4u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrarn_b_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrarn_b_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrarn_h_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrarn_h_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrarn_w_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrarn_w_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrarn_b_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrarn_b_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrarn_h_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrarn_h_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrarn_w_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrarn_w_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrarn_bu_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrarn_bu_h((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrarn_hu_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrarn_hu_w((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrarn_wu_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrarn_wu_d((v4u64)_1, (v4u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrln_b_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrln_b_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrln_h_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrln_h_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrln_w_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrln_w_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrln_bu_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrln_bu_h((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrln_hu_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrln_hu_w((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrln_wu_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrln_wu_d((v4u64)_1, (v4u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrlrn_b_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrlrn_b_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrlrn_h_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrlrn_h_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsrlrn_w_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsrlrn_w_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrlrn_bu_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrlrn_bu_h((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrlrn_hu_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrlrn_hu_w((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrlrn_wu_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrlrn_wu_d((v4u64)_1, (v4u64)_2); +} + +#define __lasx_xvfrstpi_b(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvfrstpi_b((v32i8)(_1), (v32i8)(_2), (_3))) + +#define __lasx_xvfrstpi_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvfrstpi_h((v16i16)(_1), (v16i16)(_2), (_3))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfrstp_b(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvfrstp_b((v32i8)_1, (v32i8)_2, (v32i8)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfrstp_h(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvfrstp_h((v16i16)_1, (v16i16)_2, (v16i16)_3); +} + +#define __lasx_xvshuf4i_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ + ((__m256i)__builtin_lasx_xvshuf4i_d((v4i64)(_1), (v4i64)(_2), (_3))) + +#define __lasx_xvbsrl_v(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvbsrl_v((v32i8)(_1), (_2))) + +#define __lasx_xvbsll_v(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvbsll_v((v32i8)(_1), (_2))) + +#define __lasx_xvextrins_b(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ + ((__m256i)__builtin_lasx_xvextrins_b((v32i8)(_1), (v32i8)(_2), (_3))) + +#define __lasx_xvextrins_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ + ((__m256i)__builtin_lasx_xvextrins_h((v16i16)(_1), (v16i16)(_2), (_3))) + +#define __lasx_xvextrins_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ + ((__m256i)__builtin_lasx_xvextrins_w((v8i32)(_1), (v8i32)(_2), (_3))) + +#define __lasx_xvextrins_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ + ((__m256i)__builtin_lasx_xvextrins_d((v4i64)(_1), (v4i64)(_2), (_3))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmskltz_b(__m256i _1) { + return (__m256i)__builtin_lasx_xvmskltz_b((v32i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmskltz_h(__m256i _1) { + return (__m256i)__builtin_lasx_xvmskltz_h((v16i16)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmskltz_w(__m256i _1) { + return (__m256i)__builtin_lasx_xvmskltz_w((v8i32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmskltz_d(__m256i _1) { + return (__m256i)__builtin_lasx_xvmskltz_d((v4i64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsigncov_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsigncov_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsigncov_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsigncov_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsigncov_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsigncov_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsigncov_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsigncov_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfmadd_s(__m256 _1, __m256 _2, __m256 _3) { + return (__m256)__builtin_lasx_xvfmadd_s((v8f32)_1, (v8f32)_2, (v8f32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfmadd_d(__m256d _1, __m256d _2, __m256d _3) { + return (__m256d)__builtin_lasx_xvfmadd_d((v4f64)_1, (v4f64)_2, (v4f64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfmsub_s(__m256 _1, __m256 _2, __m256 _3) { + return (__m256)__builtin_lasx_xvfmsub_s((v8f32)_1, (v8f32)_2, (v8f32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfmsub_d(__m256d _1, __m256d _2, __m256d _3) { + return (__m256d)__builtin_lasx_xvfmsub_d((v4f64)_1, (v4f64)_2, (v4f64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfnmadd_s(__m256 _1, __m256 _2, __m256 _3) { + return (__m256)__builtin_lasx_xvfnmadd_s((v8f32)_1, (v8f32)_2, (v8f32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfnmadd_d(__m256d _1, __m256d _2, __m256d _3) { + return (__m256d)__builtin_lasx_xvfnmadd_d((v4f64)_1, (v4f64)_2, (v4f64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfnmsub_s(__m256 _1, __m256 _2, __m256 _3) { + return (__m256)__builtin_lasx_xvfnmsub_s((v8f32)_1, (v8f32)_2, (v8f32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfnmsub_d(__m256d _1, __m256d _2, __m256d _3) { + return (__m256d)__builtin_lasx_xvfnmsub_d((v4f64)_1, (v4f64)_2, (v4f64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrne_w_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftintrne_w_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrne_l_d(__m256d _1) { + return (__m256i)__builtin_lasx_xvftintrne_l_d((v4f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrp_w_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftintrp_w_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrp_l_d(__m256d _1) { + return (__m256i)__builtin_lasx_xvftintrp_l_d((v4f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrm_w_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftintrm_w_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrm_l_d(__m256d _1) { + return (__m256i)__builtin_lasx_xvftintrm_l_d((v4f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftint_w_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvftint_w_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvffint_s_l(__m256i _1, __m256i _2) { + return (__m256)__builtin_lasx_xvffint_s_l((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrz_w_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvftintrz_w_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrp_w_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvftintrp_w_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrm_w_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvftintrm_w_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrne_w_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvftintrne_w_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftinth_l_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftinth_l_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintl_l_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftintl_l_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvffinth_d_w(__m256i _1) { + return (__m256d)__builtin_lasx_xvffinth_d_w((v8i32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvffintl_d_w(__m256i _1) { + return (__m256d)__builtin_lasx_xvffintl_d_w((v8i32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrzh_l_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftintrzh_l_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrzl_l_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftintrzl_l_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrph_l_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftintrph_l_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrpl_l_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftintrpl_l_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrmh_l_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftintrmh_l_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrml_l_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftintrml_l_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrneh_l_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftintrneh_l_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvftintrnel_l_s(__m256 _1) { + return (__m256i)__builtin_lasx_xvftintrnel_l_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfrintrne_s(__m256 _1) { + return (__m256)__builtin_lasx_xvfrintrne_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfrintrne_d(__m256d _1) { + return (__m256d)__builtin_lasx_xvfrintrne_d((v4f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfrintrz_s(__m256 _1) { + return (__m256)__builtin_lasx_xvfrintrz_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfrintrz_d(__m256d _1) { + return (__m256d)__builtin_lasx_xvfrintrz_d((v4f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfrintrp_s(__m256 _1) { + return (__m256)__builtin_lasx_xvfrintrp_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfrintrp_d(__m256d _1) { + return (__m256d)__builtin_lasx_xvfrintrp_d((v4f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfrintrm_s(__m256 _1) { + return (__m256)__builtin_lasx_xvfrintrm_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfrintrm_d(__m256d _1) { + return (__m256d)__builtin_lasx_xvfrintrm_d((v4f64)_1); +} + +#define __lasx_xvld(/*void **/ _1, /*si12*/ _2) \ + ((__m256i)__builtin_lasx_xvld((void const *)(_1), (_2))) + +#define __lasx_xvst(/*__m256i*/ _1, /*void **/ _2, /*si12*/ _3) \ + ((void)__builtin_lasx_xvst((v32i8)(_1), (void *)(_2), (_3))) + +#define __lasx_xvstelm_b(/*__m256i*/ _1, /*void **/ _2, /*si8*/ _3, \ + /*idx*/ _4) \ + ((void)__builtin_lasx_xvstelm_b((v32i8)(_1), (void *)(_2), (_3), (_4))) + +#define __lasx_xvstelm_h(/*__m256i*/ _1, /*void **/ _2, /*si8*/ _3, \ + /*idx*/ _4) \ + ((void)__builtin_lasx_xvstelm_h((v16i16)(_1), (void *)(_2), (_3), (_4))) + +#define __lasx_xvstelm_w(/*__m256i*/ _1, /*void **/ _2, /*si8*/ _3, \ + /*idx*/ _4) \ + ((void)__builtin_lasx_xvstelm_w((v8i32)(_1), (void *)(_2), (_3), (_4))) + +#define __lasx_xvstelm_d(/*__m256i*/ _1, /*void **/ _2, /*si8*/ _3, \ + /*idx*/ _4) \ + ((void)__builtin_lasx_xvstelm_d((v4i64)(_1), (void *)(_2), (_3), (_4))) + +#define __lasx_xvinsve0_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui3*/ _3) \ + ((__m256i)__builtin_lasx_xvinsve0_w((v8i32)(_1), (v8i32)(_2), (_3))) + +#define __lasx_xvinsve0_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui2*/ _3) \ + ((__m256i)__builtin_lasx_xvinsve0_d((v4i64)(_1), (v4i64)(_2), (_3))) + +#define __lasx_xvpickve_w(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvpickve_w((v8i32)(_1), (_2))) + +#define __lasx_xvpickve_d(/*__m256i*/ _1, /*ui2*/ _2) \ + ((__m256i)__builtin_lasx_xvpickve_d((v4i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrlrn_b_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrlrn_b_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrlrn_h_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrlrn_h_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrlrn_w_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrlrn_w_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrln_b_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrln_b_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrln_h_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrln_h_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvssrln_w_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvssrln_w_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvorn_v(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvorn_v((v32i8)_1, (v32i8)_2); +} + +#define __lasx_xvldi(/*i13*/ _1) ((__m256i)__builtin_lasx_xvldi((_1))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvldx(void const *_1, long int _2) { + return (__m256i)__builtin_lasx_xvldx((void const *)_1, (long int)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) void + __lasx_xvstx(__m256i _1, void *_2, long int _3) { + return (void)__builtin_lasx_xvstx((v32i8)_1, (void *)_2, (long int)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvextl_qu_du(__m256i _1) { + return (__m256i)__builtin_lasx_xvextl_qu_du((v4u64)_1); +} + +#define __lasx_xvinsgr2vr_w(/*__m256i*/ _1, /*int*/ _2, /*ui3*/ _3) \ + ((__m256i)__builtin_lasx_xvinsgr2vr_w((v8i32)(_1), (int)(_2), (_3))) + +#define __lasx_xvinsgr2vr_d(/*__m256i*/ _1, /*long int*/ _2, /*ui2*/ _3) \ + ((__m256i)__builtin_lasx_xvinsgr2vr_d((v4i64)(_1), (long int)(_2), (_3))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvreplve0_b(__m256i _1) { + return (__m256i)__builtin_lasx_xvreplve0_b((v32i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvreplve0_h(__m256i _1) { + return (__m256i)__builtin_lasx_xvreplve0_h((v16i16)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvreplve0_w(__m256i _1) { + return (__m256i)__builtin_lasx_xvreplve0_w((v8i32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvreplve0_d(__m256i _1) { + return (__m256i)__builtin_lasx_xvreplve0_d((v4i64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvreplve0_q(__m256i _1) { + return (__m256i)__builtin_lasx_xvreplve0_q((v32i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_vext2xv_h_b(__m256i _1) { + return (__m256i)__builtin_lasx_vext2xv_h_b((v32i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_vext2xv_w_h(__m256i _1) { + return (__m256i)__builtin_lasx_vext2xv_w_h((v16i16)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_vext2xv_d_w(__m256i _1) { + return (__m256i)__builtin_lasx_vext2xv_d_w((v8i32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_vext2xv_w_b(__m256i _1) { + return (__m256i)__builtin_lasx_vext2xv_w_b((v32i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_vext2xv_d_h(__m256i _1) { + return (__m256i)__builtin_lasx_vext2xv_d_h((v16i16)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_vext2xv_d_b(__m256i _1) { + return (__m256i)__builtin_lasx_vext2xv_d_b((v32i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_vext2xv_hu_bu(__m256i _1) { + return (__m256i)__builtin_lasx_vext2xv_hu_bu((v32i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_vext2xv_wu_hu(__m256i _1) { + return (__m256i)__builtin_lasx_vext2xv_wu_hu((v16i16)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_vext2xv_du_wu(__m256i _1) { + return (__m256i)__builtin_lasx_vext2xv_du_wu((v8i32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_vext2xv_wu_bu(__m256i _1) { + return (__m256i)__builtin_lasx_vext2xv_wu_bu((v32i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_vext2xv_du_hu(__m256i _1) { + return (__m256i)__builtin_lasx_vext2xv_du_hu((v16i16)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_vext2xv_du_bu(__m256i _1) { + return (__m256i)__builtin_lasx_vext2xv_du_bu((v32i8)_1); +} + +#define __lasx_xvpermi_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ + ((__m256i)__builtin_lasx_xvpermi_q((v32i8)(_1), (v32i8)(_2), (_3))) + +#define __lasx_xvpermi_d(/*__m256i*/ _1, /*ui8*/ _2) \ + ((__m256i)__builtin_lasx_xvpermi_d((v4i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvperm_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvperm_w((v8i32)_1, (v8i32)_2); +} + +#define __lasx_xvldrepl_b(/*void **/ _1, /*si12*/ _2) \ + ((__m256i)__builtin_lasx_xvldrepl_b((void const *)(_1), (_2))) + +#define __lasx_xvldrepl_h(/*void **/ _1, /*si11*/ _2) \ + ((__m256i)__builtin_lasx_xvldrepl_h((void const *)(_1), (_2))) + +#define __lasx_xvldrepl_w(/*void **/ _1, /*si10*/ _2) \ + ((__m256i)__builtin_lasx_xvldrepl_w((void const *)(_1), (_2))) + +#define __lasx_xvldrepl_d(/*void **/ _1, /*si9*/ _2) \ + ((__m256i)__builtin_lasx_xvldrepl_d((void const *)(_1), (_2))) + +#define __lasx_xvpickve2gr_w(/*__m256i*/ _1, /*ui3*/ _2) \ + ((int)__builtin_lasx_xvpickve2gr_w((v8i32)(_1), (_2))) + +#define __lasx_xvpickve2gr_wu(/*__m256i*/ _1, /*ui3*/ _2) \ + ((unsigned int)__builtin_lasx_xvpickve2gr_wu((v8i32)(_1), (_2))) + +#define __lasx_xvpickve2gr_d(/*__m256i*/ _1, /*ui2*/ _2) \ + ((long int)__builtin_lasx_xvpickve2gr_d((v4i64)(_1), (_2))) + +#define __lasx_xvpickve2gr_du(/*__m256i*/ _1, /*ui2*/ _2) \ + ((unsigned long int)__builtin_lasx_xvpickve2gr_du((v4i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwev_q_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwev_q_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwev_d_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwev_d_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwev_w_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwev_w_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwev_h_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwev_h_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwev_q_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwev_q_du((v4u64)_1, (v4u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwev_d_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwev_d_wu((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwev_w_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwev_w_hu((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwev_h_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwev_h_bu((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwev_q_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwev_q_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwev_d_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwev_d_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwev_w_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwev_w_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwev_h_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwev_h_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwev_q_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwev_q_du((v4u64)_1, (v4u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwev_d_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwev_d_wu((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwev_w_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwev_w_hu((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwev_h_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwev_h_bu((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwev_q_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwev_q_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwev_d_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwev_d_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwev_w_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwev_w_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwev_h_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwev_h_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwev_q_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwev_q_du((v4u64)_1, (v4u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwev_d_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwev_d_wu((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwev_w_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwev_w_hu((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwev_h_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwev_h_bu((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwod_q_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwod_q_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwod_d_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwod_d_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwod_w_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwod_w_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwod_h_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwod_h_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwod_q_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwod_q_du((v4u64)_1, (v4u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwod_d_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwod_d_wu((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwod_w_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwod_w_hu((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwod_h_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwod_h_bu((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwod_q_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwod_q_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwod_d_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwod_d_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwod_w_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwod_w_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwod_h_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwod_h_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwod_q_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwod_q_du((v4u64)_1, (v4u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwod_d_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwod_d_wu((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwod_w_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwod_w_hu((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsubwod_h_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsubwod_h_bu((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwod_q_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwod_q_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwod_d_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwod_d_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwod_w_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwod_w_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwod_h_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwod_h_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwod_q_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwod_q_du((v4u64)_1, (v4u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwod_d_wu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwod_d_wu((v8u32)_1, (v8u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwod_w_hu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwod_w_hu((v16u16)_1, (v16u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwod_h_bu(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwod_h_bu((v32u8)_1, (v32u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwev_d_wu_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwev_d_wu_w((v8u32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwev_w_hu_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwev_w_hu_h((v16u16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwev_h_bu_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwev_h_bu_b((v32u8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwev_d_wu_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwev_d_wu_w((v8u32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwev_w_hu_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwev_w_hu_h((v16u16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwev_h_bu_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwev_h_bu_b((v32u8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwod_d_wu_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwod_d_wu_w((v8u32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwod_w_hu_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwod_w_hu_h((v16u16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwod_h_bu_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwod_h_bu_b((v32u8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwod_d_wu_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwod_d_wu_w((v8u32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwod_w_hu_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwod_w_hu_h((v16u16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwod_h_bu_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwod_h_bu_b((v32u8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhaddw_q_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhaddw_q_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhaddw_qu_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhaddw_qu_du((v4u64)_1, (v4u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhsubw_q_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhsubw_q_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvhsubw_qu_du(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvhsubw_qu_du((v4u64)_1, (v4u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwev_q_d(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwev_q_d((v4i64)_1, (v4i64)_2, (v4i64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwev_d_w(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwev_d_w((v4i64)_1, (v8i32)_2, (v8i32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwev_w_h(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwev_w_h((v8i32)_1, (v16i16)_2, + (v16i16)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwev_h_b(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwev_h_b((v16i16)_1, (v32i8)_2, + (v32i8)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwev_q_du(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwev_q_du((v4u64)_1, (v4u64)_2, + (v4u64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwev_d_wu(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwev_d_wu((v4u64)_1, (v8u32)_2, + (v8u32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwev_w_hu(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwev_w_hu((v8u32)_1, (v16u16)_2, + (v16u16)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwev_h_bu(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwev_h_bu((v16u16)_1, (v32u8)_2, + (v32u8)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwod_q_d(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwod_q_d((v4i64)_1, (v4i64)_2, (v4i64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwod_d_w(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwod_d_w((v4i64)_1, (v8i32)_2, (v8i32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwod_w_h(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwod_w_h((v8i32)_1, (v16i16)_2, + (v16i16)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwod_h_b(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwod_h_b((v16i16)_1, (v32i8)_2, + (v32i8)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwod_q_du(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwod_q_du((v4u64)_1, (v4u64)_2, + (v4u64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwod_d_wu(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwod_d_wu((v4u64)_1, (v8u32)_2, + (v8u32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwod_w_hu(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwod_w_hu((v8u32)_1, (v16u16)_2, + (v16u16)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwod_h_bu(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwod_h_bu((v16u16)_1, (v32u8)_2, + (v32u8)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwev_q_du_d(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwev_q_du_d((v4i64)_1, (v4u64)_2, + (v4i64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwev_d_wu_w(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwev_d_wu_w((v4i64)_1, (v8u32)_2, + (v8i32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwev_w_hu_h(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwev_w_hu_h((v8i32)_1, (v16u16)_2, + (v16i16)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwev_h_bu_b(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwev_h_bu_b((v16i16)_1, (v32u8)_2, + (v32i8)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwod_q_du_d(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwod_q_du_d((v4i64)_1, (v4u64)_2, + (v4i64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwod_d_wu_w(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwod_d_wu_w((v4i64)_1, (v8u32)_2, + (v8i32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwod_w_hu_h(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwod_w_hu_h((v8i32)_1, (v16u16)_2, + (v16i16)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmaddwod_h_bu_b(__m256i _1, __m256i _2, __m256i _3) { + return (__m256i)__builtin_lasx_xvmaddwod_h_bu_b((v16i16)_1, (v32u8)_2, + (v32i8)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvrotr_b(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvrotr_b((v32i8)_1, (v32i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvrotr_h(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvrotr_h((v16i16)_1, (v16i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvrotr_w(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvrotr_w((v8i32)_1, (v8i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvrotr_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvrotr_d((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvadd_q(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvadd_q((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvsub_q(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvsub_q((v4i64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwev_q_du_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwev_q_du_d((v4u64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvaddwod_q_du_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvaddwod_q_du_d((v4u64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwev_q_du_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwev_q_du_d((v4u64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmulwod_q_du_d(__m256i _1, __m256i _2) { + return (__m256i)__builtin_lasx_xvmulwod_q_du_d((v4u64)_1, (v4i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmskgez_b(__m256i _1) { + return (__m256i)__builtin_lasx_xvmskgez_b((v32i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvmsknz_b(__m256i _1) { + return (__m256i)__builtin_lasx_xvmsknz_b((v32i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvexth_h_b(__m256i _1) { + return (__m256i)__builtin_lasx_xvexth_h_b((v32i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvexth_w_h(__m256i _1) { + return (__m256i)__builtin_lasx_xvexth_w_h((v16i16)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvexth_d_w(__m256i _1) { + return (__m256i)__builtin_lasx_xvexth_d_w((v8i32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvexth_q_d(__m256i _1) { + return (__m256i)__builtin_lasx_xvexth_q_d((v4i64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvexth_hu_bu(__m256i _1) { + return (__m256i)__builtin_lasx_xvexth_hu_bu((v32u8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvexth_wu_hu(__m256i _1) { + return (__m256i)__builtin_lasx_xvexth_wu_hu((v16u16)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvexth_du_wu(__m256i _1) { + return (__m256i)__builtin_lasx_xvexth_du_wu((v8u32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvexth_qu_du(__m256i _1) { + return (__m256i)__builtin_lasx_xvexth_qu_du((v4u64)_1); +} + +#define __lasx_xvrotri_b(/*__m256i*/ _1, /*ui3*/ _2) \ + ((__m256i)__builtin_lasx_xvrotri_b((v32i8)(_1), (_2))) + +#define __lasx_xvrotri_h(/*__m256i*/ _1, /*ui4*/ _2) \ + ((__m256i)__builtin_lasx_xvrotri_h((v16i16)(_1), (_2))) + +#define __lasx_xvrotri_w(/*__m256i*/ _1, /*ui5*/ _2) \ + ((__m256i)__builtin_lasx_xvrotri_w((v8i32)(_1), (_2))) + +#define __lasx_xvrotri_d(/*__m256i*/ _1, /*ui6*/ _2) \ + ((__m256i)__builtin_lasx_xvrotri_d((v4i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvextl_q_d(__m256i _1) { + return (__m256i)__builtin_lasx_xvextl_q_d((v4i64)_1); +} + +#define __lasx_xvsrlni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ + ((__m256i)__builtin_lasx_xvsrlni_b_h((v32i8)(_1), (v32i8)(_2), (_3))) + +#define __lasx_xvsrlni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvsrlni_h_w((v16i16)(_1), (v16i16)(_2), (_3))) + +#define __lasx_xvsrlni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ + ((__m256i)__builtin_lasx_xvsrlni_w_d((v8i32)(_1), (v8i32)(_2), (_3))) + +#define __lasx_xvsrlni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ + ((__m256i)__builtin_lasx_xvsrlni_d_q((v4i64)(_1), (v4i64)(_2), (_3))) + +#define __lasx_xvsrlrni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ + ((__m256i)__builtin_lasx_xvsrlrni_b_h((v32i8)(_1), (v32i8)(_2), (_3))) + +#define __lasx_xvsrlrni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvsrlrni_h_w((v16i16)(_1), (v16i16)(_2), (_3))) + +#define __lasx_xvsrlrni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ + ((__m256i)__builtin_lasx_xvsrlrni_w_d((v8i32)(_1), (v8i32)(_2), (_3))) + +#define __lasx_xvsrlrni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ + ((__m256i)__builtin_lasx_xvsrlrni_d_q((v4i64)(_1), (v4i64)(_2), (_3))) + +#define __lasx_xvssrlni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlni_b_h((v32i8)(_1), (v32i8)(_2), (_3))) + +#define __lasx_xvssrlni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlni_h_w((v16i16)(_1), (v16i16)(_2), (_3))) + +#define __lasx_xvssrlni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlni_w_d((v8i32)(_1), (v8i32)(_2), (_3))) + +#define __lasx_xvssrlni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlni_d_q((v4i64)(_1), (v4i64)(_2), (_3))) + +#define __lasx_xvssrlni_bu_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlni_bu_h((v32u8)(_1), (v32i8)(_2), (_3))) + +#define __lasx_xvssrlni_hu_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlni_hu_w((v16u16)(_1), (v16i16)(_2), (_3))) + +#define __lasx_xvssrlni_wu_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlni_wu_d((v8u32)(_1), (v8i32)(_2), (_3))) + +#define __lasx_xvssrlni_du_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlni_du_q((v4u64)(_1), (v4i64)(_2), (_3))) + +#define __lasx_xvssrlrni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlrni_b_h((v32i8)(_1), (v32i8)(_2), (_3))) + +#define __lasx_xvssrlrni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlrni_h_w((v16i16)(_1), (v16i16)(_2), (_3))) + +#define __lasx_xvssrlrni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlrni_w_d((v8i32)(_1), (v8i32)(_2), (_3))) + +#define __lasx_xvssrlrni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlrni_d_q((v4i64)(_1), (v4i64)(_2), (_3))) + +#define __lasx_xvssrlrni_bu_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlrni_bu_h((v32u8)(_1), (v32i8)(_2), (_3))) + +#define __lasx_xvssrlrni_hu_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlrni_hu_w((v16u16)(_1), (v16i16)(_2), (_3))) + +#define __lasx_xvssrlrni_wu_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlrni_wu_d((v8u32)(_1), (v8i32)(_2), (_3))) + +#define __lasx_xvssrlrni_du_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ + ((__m256i)__builtin_lasx_xvssrlrni_du_q((v4u64)(_1), (v4i64)(_2), (_3))) + +#define __lasx_xvsrani_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ + ((__m256i)__builtin_lasx_xvsrani_b_h((v32i8)(_1), (v32i8)(_2), (_3))) + +#define __lasx_xvsrani_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvsrani_h_w((v16i16)(_1), (v16i16)(_2), (_3))) + +#define __lasx_xvsrani_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ + ((__m256i)__builtin_lasx_xvsrani_w_d((v8i32)(_1), (v8i32)(_2), (_3))) + +#define __lasx_xvsrani_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ + ((__m256i)__builtin_lasx_xvsrani_d_q((v4i64)(_1), (v4i64)(_2), (_3))) + +#define __lasx_xvsrarni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ + ((__m256i)__builtin_lasx_xvsrarni_b_h((v32i8)(_1), (v32i8)(_2), (_3))) + +#define __lasx_xvsrarni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvsrarni_h_w((v16i16)(_1), (v16i16)(_2), (_3))) + +#define __lasx_xvsrarni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ + ((__m256i)__builtin_lasx_xvsrarni_w_d((v8i32)(_1), (v8i32)(_2), (_3))) + +#define __lasx_xvsrarni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ + ((__m256i)__builtin_lasx_xvsrarni_d_q((v4i64)(_1), (v4i64)(_2), (_3))) + +#define __lasx_xvssrani_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ + ((__m256i)__builtin_lasx_xvssrani_b_h((v32i8)(_1), (v32i8)(_2), (_3))) + +#define __lasx_xvssrani_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvssrani_h_w((v16i16)(_1), (v16i16)(_2), (_3))) + +#define __lasx_xvssrani_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ + ((__m256i)__builtin_lasx_xvssrani_w_d((v8i32)(_1), (v8i32)(_2), (_3))) + +#define __lasx_xvssrani_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ + ((__m256i)__builtin_lasx_xvssrani_d_q((v4i64)(_1), (v4i64)(_2), (_3))) + +#define __lasx_xvssrani_bu_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ + ((__m256i)__builtin_lasx_xvssrani_bu_h((v32u8)(_1), (v32i8)(_2), (_3))) + +#define __lasx_xvssrani_hu_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvssrani_hu_w((v16u16)(_1), (v16i16)(_2), (_3))) + +#define __lasx_xvssrani_wu_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ + ((__m256i)__builtin_lasx_xvssrani_wu_d((v8u32)(_1), (v8i32)(_2), (_3))) + +#define __lasx_xvssrani_du_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ + ((__m256i)__builtin_lasx_xvssrani_du_q((v4u64)(_1), (v4i64)(_2), (_3))) + +#define __lasx_xvssrarni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ + ((__m256i)__builtin_lasx_xvssrarni_b_h((v32i8)(_1), (v32i8)(_2), (_3))) + +#define __lasx_xvssrarni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvssrarni_h_w((v16i16)(_1), (v16i16)(_2), (_3))) + +#define __lasx_xvssrarni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ + ((__m256i)__builtin_lasx_xvssrarni_w_d((v8i32)(_1), (v8i32)(_2), (_3))) + +#define __lasx_xvssrarni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ + ((__m256i)__builtin_lasx_xvssrarni_d_q((v4i64)(_1), (v4i64)(_2), (_3))) + +#define __lasx_xvssrarni_bu_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ + ((__m256i)__builtin_lasx_xvssrarni_bu_h((v32u8)(_1), (v32i8)(_2), (_3))) + +#define __lasx_xvssrarni_hu_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ + ((__m256i)__builtin_lasx_xvssrarni_hu_w((v16u16)(_1), (v16i16)(_2), (_3))) + +#define __lasx_xvssrarni_wu_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ + ((__m256i)__builtin_lasx_xvssrarni_wu_d((v8u32)(_1), (v8i32)(_2), (_3))) + +#define __lasx_xvssrarni_du_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ + ((__m256i)__builtin_lasx_xvssrarni_du_q((v4u64)(_1), (v4i64)(_2), (_3))) + +#define __lasx_xbnz_b(/*__m256i*/ _1) ((int)__builtin_lasx_xbnz_b((v32u8)(_1))) + +#define __lasx_xbnz_d(/*__m256i*/ _1) ((int)__builtin_lasx_xbnz_d((v4u64)(_1))) + +#define __lasx_xbnz_h(/*__m256i*/ _1) ((int)__builtin_lasx_xbnz_h((v16u16)(_1))) + +#define __lasx_xbnz_v(/*__m256i*/ _1) ((int)__builtin_lasx_xbnz_v((v32u8)(_1))) + +#define __lasx_xbnz_w(/*__m256i*/ _1) ((int)__builtin_lasx_xbnz_w((v8u32)(_1))) + +#define __lasx_xbz_b(/*__m256i*/ _1) ((int)__builtin_lasx_xbz_b((v32u8)(_1))) + +#define __lasx_xbz_d(/*__m256i*/ _1) ((int)__builtin_lasx_xbz_d((v4u64)(_1))) + +#define __lasx_xbz_h(/*__m256i*/ _1) ((int)__builtin_lasx_xbz_h((v16u16)(_1))) + +#define __lasx_xbz_v(/*__m256i*/ _1) ((int)__builtin_lasx_xbz_v((v32u8)(_1))) + +#define __lasx_xbz_w(/*__m256i*/ _1) ((int)__builtin_lasx_xbz_w((v8u32)(_1))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_caf_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_caf_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_caf_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_caf_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_ceq_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_ceq_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_ceq_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_ceq_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cle_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_cle_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cle_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_cle_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_clt_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_clt_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_clt_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_clt_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cne_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_cne_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cne_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_cne_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cor_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_cor_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cor_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_cor_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cueq_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_cueq_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cueq_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_cueq_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cule_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_cule_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cule_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_cule_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cult_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_cult_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cult_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_cult_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cun_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_cun_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cune_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_cune_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cune_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_cune_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_cun_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_cun_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_saf_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_saf_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_saf_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_saf_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_seq_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_seq_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_seq_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_seq_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sle_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_sle_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sle_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_sle_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_slt_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_slt_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_slt_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_slt_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sne_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_sne_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sne_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_sne_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sor_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_sor_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sor_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_sor_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sueq_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_sueq_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sueq_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_sueq_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sule_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_sule_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sule_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_sule_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sult_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_sult_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sult_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_sult_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sun_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_sun_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sune_d(__m256d _1, __m256d _2) { + return (__m256i)__builtin_lasx_xvfcmp_sune_d((v4f64)_1, (v4f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sune_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_sune_s((v8f32)_1, (v8f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i + __lasx_xvfcmp_sun_s(__m256 _1, __m256 _2) { + return (__m256i)__builtin_lasx_xvfcmp_sun_s((v8f32)_1, (v8f32)_2); +} + +#define __lasx_xvpickve_d_f(/*__m256d*/ _1, /*ui2*/ _2) \ + ((__m256d)__builtin_lasx_xvpickve_d_f((v4f64)(_1), (_2))) + +#define __lasx_xvpickve_w_f(/*__m256*/ _1, /*ui3*/ _2) \ + ((__m256)__builtin_lasx_xvpickve_w_f((v8f32)(_1), (_2))) + +#define __lasx_xvrepli_b(/*si10*/ _1) ((__m256i)__builtin_lasx_xvrepli_b((_1))) + +#define __lasx_xvrepli_d(/*si10*/ _1) ((__m256i)__builtin_lasx_xvrepli_d((_1))) + +#define __lasx_xvrepli_h(/*si10*/ _1) ((__m256i)__builtin_lasx_xvrepli_h((_1))) + +#define __lasx_xvrepli_w(/*si10*/ _1) ((__m256i)__builtin_lasx_xvrepli_w((_1))) + +#endif /* defined(__loongarch_asx). */ +#endif /* _LOONGSON_ASXINTRIN_H. */ diff --git a/clang/lib/Headers/lsxintrin.h b/clang/lib/Headers/lsxintrin.h new file mode 100644 index 0000000000000000000000000000000000000000..a29bc7757ab5680e733561da9700716512885f71 --- /dev/null +++ b/clang/lib/Headers/lsxintrin.h @@ -0,0 +1,3726 @@ +/*===------------- lsxintrin.h - LoongArch LSX intrinsics ------------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef _LOONGSON_SXINTRIN_H +#define _LOONGSON_SXINTRIN_H 1 + +#if defined(__loongarch_sx) +typedef signed char v16i8 __attribute__((vector_size(16), aligned(16))); +typedef signed char v16i8_b __attribute__((vector_size(16), aligned(1))); +typedef unsigned char v16u8 __attribute__((vector_size(16), aligned(16))); +typedef unsigned char v16u8_b __attribute__((vector_size(16), aligned(1))); +typedef short v8i16 __attribute__((vector_size(16), aligned(16))); +typedef short v8i16_h __attribute__((vector_size(16), aligned(2))); +typedef unsigned short v8u16 __attribute__((vector_size(16), aligned(16))); +typedef unsigned short v8u16_h __attribute__((vector_size(16), aligned(2))); +typedef int v4i32 __attribute__((vector_size(16), aligned(16))); +typedef int v4i32_w __attribute__((vector_size(16), aligned(4))); +typedef unsigned int v4u32 __attribute__((vector_size(16), aligned(16))); +typedef unsigned int v4u32_w __attribute__((vector_size(16), aligned(4))); +typedef long long v2i64 __attribute__((vector_size(16), aligned(16))); +typedef long long v2i64_d __attribute__((vector_size(16), aligned(8))); +typedef unsigned long long v2u64 __attribute__((vector_size(16), aligned(16))); +typedef unsigned long long v2u64_d __attribute__((vector_size(16), aligned(8))); +typedef float v4f32 __attribute__((vector_size(16), aligned(16))); +typedef float v4f32_w __attribute__((vector_size(16), aligned(4))); +typedef double v2f64 __attribute__((vector_size(16), aligned(16))); +typedef double v2f64_d __attribute__((vector_size(16), aligned(8))); + +typedef long long __m128i __attribute__((__vector_size__(16), __may_alias__)); +typedef float __m128 __attribute__((__vector_size__(16), __may_alias__)); +typedef double __m128d __attribute__((__vector_size__(16), __may_alias__)); + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsll_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsll_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsll_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsll_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsll_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsll_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsll_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsll_d((v2i64)_1, (v2i64)_2); +} + +#define __lsx_vslli_b(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vslli_b((v16i8)(_1), (_2))) + +#define __lsx_vslli_h(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vslli_h((v8i16)(_1), (_2))) + +#define __lsx_vslli_w(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vslli_w((v4i32)(_1), (_2))) + +#define __lsx_vslli_d(/*__m128i*/ _1, /*ui6*/ _2) \ + ((__m128i)__builtin_lsx_vslli_d((v2i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsra_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsra_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsra_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsra_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsra_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsra_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsra_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsra_d((v2i64)_1, (v2i64)_2); +} + +#define __lsx_vsrai_b(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vsrai_b((v16i8)(_1), (_2))) + +#define __lsx_vsrai_h(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vsrai_h((v8i16)(_1), (_2))) + +#define __lsx_vsrai_w(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vsrai_w((v4i32)(_1), (_2))) + +#define __lsx_vsrai_d(/*__m128i*/ _1, /*ui6*/ _2) \ + ((__m128i)__builtin_lsx_vsrai_d((v2i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrar_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrar_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrar_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrar_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrar_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrar_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrar_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrar_d((v2i64)_1, (v2i64)_2); +} + +#define __lsx_vsrari_b(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vsrari_b((v16i8)(_1), (_2))) + +#define __lsx_vsrari_h(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vsrari_h((v8i16)(_1), (_2))) + +#define __lsx_vsrari_w(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vsrari_w((v4i32)(_1), (_2))) + +#define __lsx_vsrari_d(/*__m128i*/ _1, /*ui6*/ _2) \ + ((__m128i)__builtin_lsx_vsrari_d((v2i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrl_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrl_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrl_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrl_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrl_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrl_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrl_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrl_d((v2i64)_1, (v2i64)_2); +} + +#define __lsx_vsrli_b(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vsrli_b((v16i8)(_1), (_2))) + +#define __lsx_vsrli_h(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vsrli_h((v8i16)(_1), (_2))) + +#define __lsx_vsrli_w(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vsrli_w((v4i32)(_1), (_2))) + +#define __lsx_vsrli_d(/*__m128i*/ _1, /*ui6*/ _2) \ + ((__m128i)__builtin_lsx_vsrli_d((v2i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrlr_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrlr_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrlr_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrlr_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrlr_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrlr_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrlr_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrlr_d((v2i64)_1, (v2i64)_2); +} + +#define __lsx_vsrlri_b(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vsrlri_b((v16i8)(_1), (_2))) + +#define __lsx_vsrlri_h(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vsrlri_h((v8i16)(_1), (_2))) + +#define __lsx_vsrlri_w(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vsrlri_w((v4i32)(_1), (_2))) + +#define __lsx_vsrlri_d(/*__m128i*/ _1, /*ui6*/ _2) \ + ((__m128i)__builtin_lsx_vsrlri_d((v2i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vbitclr_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vbitclr_b((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vbitclr_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vbitclr_h((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vbitclr_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vbitclr_w((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vbitclr_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vbitclr_d((v2u64)_1, (v2u64)_2); +} + +#define __lsx_vbitclri_b(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vbitclri_b((v16u8)(_1), (_2))) + +#define __lsx_vbitclri_h(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vbitclri_h((v8u16)(_1), (_2))) + +#define __lsx_vbitclri_w(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vbitclri_w((v4u32)(_1), (_2))) + +#define __lsx_vbitclri_d(/*__m128i*/ _1, /*ui6*/ _2) \ + ((__m128i)__builtin_lsx_vbitclri_d((v2u64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vbitset_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vbitset_b((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vbitset_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vbitset_h((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vbitset_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vbitset_w((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vbitset_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vbitset_d((v2u64)_1, (v2u64)_2); +} + +#define __lsx_vbitseti_b(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vbitseti_b((v16u8)(_1), (_2))) + +#define __lsx_vbitseti_h(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vbitseti_h((v8u16)(_1), (_2))) + +#define __lsx_vbitseti_w(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vbitseti_w((v4u32)(_1), (_2))) + +#define __lsx_vbitseti_d(/*__m128i*/ _1, /*ui6*/ _2) \ + ((__m128i)__builtin_lsx_vbitseti_d((v2u64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vbitrev_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vbitrev_b((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vbitrev_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vbitrev_h((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vbitrev_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vbitrev_w((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vbitrev_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vbitrev_d((v2u64)_1, (v2u64)_2); +} + +#define __lsx_vbitrevi_b(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vbitrevi_b((v16u8)(_1), (_2))) + +#define __lsx_vbitrevi_h(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vbitrevi_h((v8u16)(_1), (_2))) + +#define __lsx_vbitrevi_w(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vbitrevi_w((v4u32)(_1), (_2))) + +#define __lsx_vbitrevi_d(/*__m128i*/ _1, /*ui6*/ _2) \ + ((__m128i)__builtin_lsx_vbitrevi_d((v2u64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vadd_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vadd_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vadd_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vadd_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vadd_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vadd_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vadd_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vadd_d((v2i64)_1, (v2i64)_2); +} + +#define __lsx_vaddi_bu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vaddi_bu((v16i8)(_1), (_2))) + +#define __lsx_vaddi_hu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vaddi_hu((v8i16)(_1), (_2))) + +#define __lsx_vaddi_wu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vaddi_wu((v4i32)(_1), (_2))) + +#define __lsx_vaddi_du(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vaddi_du((v2i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsub_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsub_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsub_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsub_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsub_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsub_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsub_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsub_d((v2i64)_1, (v2i64)_2); +} + +#define __lsx_vsubi_bu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vsubi_bu((v16i8)(_1), (_2))) + +#define __lsx_vsubi_hu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vsubi_hu((v8i16)(_1), (_2))) + +#define __lsx_vsubi_wu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vsubi_wu((v4i32)(_1), (_2))) + +#define __lsx_vsubi_du(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vsubi_du((v2i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmax_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmax_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmax_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmax_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmax_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmax_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmax_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmax_d((v2i64)_1, (v2i64)_2); +} + +#define __lsx_vmaxi_b(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vmaxi_b((v16i8)(_1), (_2))) + +#define __lsx_vmaxi_h(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vmaxi_h((v8i16)(_1), (_2))) + +#define __lsx_vmaxi_w(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vmaxi_w((v4i32)(_1), (_2))) + +#define __lsx_vmaxi_d(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vmaxi_d((v2i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmax_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmax_bu((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmax_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmax_hu((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmax_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmax_wu((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmax_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmax_du((v2u64)_1, (v2u64)_2); +} + +#define __lsx_vmaxi_bu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vmaxi_bu((v16u8)(_1), (_2))) + +#define __lsx_vmaxi_hu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vmaxi_hu((v8u16)(_1), (_2))) + +#define __lsx_vmaxi_wu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vmaxi_wu((v4u32)(_1), (_2))) + +#define __lsx_vmaxi_du(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vmaxi_du((v2u64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmin_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmin_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmin_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmin_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmin_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmin_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmin_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmin_d((v2i64)_1, (v2i64)_2); +} + +#define __lsx_vmini_b(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vmini_b((v16i8)(_1), (_2))) + +#define __lsx_vmini_h(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vmini_h((v8i16)(_1), (_2))) + +#define __lsx_vmini_w(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vmini_w((v4i32)(_1), (_2))) + +#define __lsx_vmini_d(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vmini_d((v2i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmin_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmin_bu((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmin_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmin_hu((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmin_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmin_wu((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmin_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmin_du((v2u64)_1, (v2u64)_2); +} + +#define __lsx_vmini_bu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vmini_bu((v16u8)(_1), (_2))) + +#define __lsx_vmini_hu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vmini_hu((v8u16)(_1), (_2))) + +#define __lsx_vmini_wu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vmini_wu((v4u32)(_1), (_2))) + +#define __lsx_vmini_du(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vmini_du((v2u64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vseq_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vseq_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vseq_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vseq_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vseq_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vseq_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vseq_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vseq_d((v2i64)_1, (v2i64)_2); +} + +#define __lsx_vseqi_b(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vseqi_b((v16i8)(_1), (_2))) + +#define __lsx_vseqi_h(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vseqi_h((v8i16)(_1), (_2))) + +#define __lsx_vseqi_w(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vseqi_w((v4i32)(_1), (_2))) + +#define __lsx_vseqi_d(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vseqi_d((v2i64)(_1), (_2))) + +#define __lsx_vslti_b(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vslti_b((v16i8)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vslt_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vslt_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vslt_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vslt_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vslt_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vslt_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vslt_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vslt_d((v2i64)_1, (v2i64)_2); +} + +#define __lsx_vslti_h(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vslti_h((v8i16)(_1), (_2))) + +#define __lsx_vslti_w(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vslti_w((v4i32)(_1), (_2))) + +#define __lsx_vslti_d(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vslti_d((v2i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vslt_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vslt_bu((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vslt_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vslt_hu((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vslt_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vslt_wu((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vslt_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vslt_du((v2u64)_1, (v2u64)_2); +} + +#define __lsx_vslti_bu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vslti_bu((v16u8)(_1), (_2))) + +#define __lsx_vslti_hu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vslti_hu((v8u16)(_1), (_2))) + +#define __lsx_vslti_wu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vslti_wu((v4u32)(_1), (_2))) + +#define __lsx_vslti_du(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vslti_du((v2u64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsle_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsle_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsle_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsle_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsle_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsle_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsle_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsle_d((v2i64)_1, (v2i64)_2); +} + +#define __lsx_vslei_b(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vslei_b((v16i8)(_1), (_2))) + +#define __lsx_vslei_h(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vslei_h((v8i16)(_1), (_2))) + +#define __lsx_vslei_w(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vslei_w((v4i32)(_1), (_2))) + +#define __lsx_vslei_d(/*__m128i*/ _1, /*si5*/ _2) \ + ((__m128i)__builtin_lsx_vslei_d((v2i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsle_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsle_bu((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsle_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsle_hu((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsle_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsle_wu((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsle_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsle_du((v2u64)_1, (v2u64)_2); +} + +#define __lsx_vslei_bu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vslei_bu((v16u8)(_1), (_2))) + +#define __lsx_vslei_hu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vslei_hu((v8u16)(_1), (_2))) + +#define __lsx_vslei_wu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vslei_wu((v4u32)(_1), (_2))) + +#define __lsx_vslei_du(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vslei_du((v2u64)(_1), (_2))) + +#define __lsx_vsat_b(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vsat_b((v16i8)(_1), (_2))) + +#define __lsx_vsat_h(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vsat_h((v8i16)(_1), (_2))) + +#define __lsx_vsat_w(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vsat_w((v4i32)(_1), (_2))) + +#define __lsx_vsat_d(/*__m128i*/ _1, /*ui6*/ _2) \ + ((__m128i)__builtin_lsx_vsat_d((v2i64)(_1), (_2))) + +#define __lsx_vsat_bu(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vsat_bu((v16u8)(_1), (_2))) + +#define __lsx_vsat_hu(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vsat_hu((v8u16)(_1), (_2))) + +#define __lsx_vsat_wu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vsat_wu((v4u32)(_1), (_2))) + +#define __lsx_vsat_du(/*__m128i*/ _1, /*ui6*/ _2) \ + ((__m128i)__builtin_lsx_vsat_du((v2u64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vadda_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vadda_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vadda_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vadda_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vadda_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vadda_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vadda_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vadda_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsadd_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsadd_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsadd_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsadd_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsadd_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsadd_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsadd_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsadd_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsadd_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsadd_bu((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsadd_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsadd_hu((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsadd_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsadd_wu((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsadd_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsadd_du((v2u64)_1, (v2u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavg_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavg_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavg_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavg_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavg_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavg_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavg_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavg_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavg_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavg_bu((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavg_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavg_hu((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavg_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavg_wu((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavg_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavg_du((v2u64)_1, (v2u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavgr_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavgr_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavgr_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavgr_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavgr_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavgr_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavgr_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavgr_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavgr_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavgr_bu((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavgr_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavgr_hu((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavgr_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavgr_wu((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vavgr_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vavgr_du((v2u64)_1, (v2u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssub_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssub_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssub_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssub_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssub_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssub_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssub_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssub_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssub_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssub_bu((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssub_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssub_hu((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssub_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssub_wu((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssub_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssub_du((v2u64)_1, (v2u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vabsd_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vabsd_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vabsd_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vabsd_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vabsd_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vabsd_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vabsd_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vabsd_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vabsd_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vabsd_bu((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vabsd_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vabsd_hu((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vabsd_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vabsd_wu((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vabsd_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vabsd_du((v2u64)_1, (v2u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmul_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmul_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmul_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmul_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmul_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmul_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmul_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmul_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmadd_b(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmadd_b((v16i8)_1, (v16i8)_2, (v16i8)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmadd_h(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmadd_h((v8i16)_1, (v8i16)_2, (v8i16)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmadd_w(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmadd_w((v4i32)_1, (v4i32)_2, (v4i32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmadd_d(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmadd_d((v2i64)_1, (v2i64)_2, (v2i64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmsub_b(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmsub_b((v16i8)_1, (v16i8)_2, (v16i8)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmsub_h(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmsub_h((v8i16)_1, (v8i16)_2, (v8i16)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmsub_w(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmsub_w((v4i32)_1, (v4i32)_2, (v4i32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmsub_d(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmsub_d((v2i64)_1, (v2i64)_2, (v2i64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vdiv_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vdiv_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vdiv_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vdiv_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vdiv_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vdiv_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vdiv_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vdiv_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vdiv_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vdiv_bu((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vdiv_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vdiv_hu((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vdiv_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vdiv_wu((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vdiv_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vdiv_du((v2u64)_1, (v2u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhaddw_h_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhaddw_h_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhaddw_w_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhaddw_w_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhaddw_d_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhaddw_d_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhaddw_hu_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhaddw_hu_bu((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhaddw_wu_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhaddw_wu_hu((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhaddw_du_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhaddw_du_wu((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhsubw_h_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhsubw_h_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhsubw_w_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhsubw_w_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhsubw_d_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhsubw_d_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhsubw_hu_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhsubw_hu_bu((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhsubw_wu_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhsubw_wu_hu((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhsubw_du_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhsubw_du_wu((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmod_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmod_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmod_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmod_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmod_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmod_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmod_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmod_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmod_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmod_bu((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmod_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmod_hu((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmod_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmod_wu((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmod_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmod_du((v2u64)_1, (v2u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vreplve_b(__m128i _1, int _2) { + return (__m128i)__builtin_lsx_vreplve_b((v16i8)_1, (int)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vreplve_h(__m128i _1, int _2) { + return (__m128i)__builtin_lsx_vreplve_h((v8i16)_1, (int)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vreplve_w(__m128i _1, int _2) { + return (__m128i)__builtin_lsx_vreplve_w((v4i32)_1, (int)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vreplve_d(__m128i _1, int _2) { + return (__m128i)__builtin_lsx_vreplve_d((v2i64)_1, (int)_2); +} + +#define __lsx_vreplvei_b(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vreplvei_b((v16i8)(_1), (_2))) + +#define __lsx_vreplvei_h(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vreplvei_h((v8i16)(_1), (_2))) + +#define __lsx_vreplvei_w(/*__m128i*/ _1, /*ui2*/ _2) \ + ((__m128i)__builtin_lsx_vreplvei_w((v4i32)(_1), (_2))) + +#define __lsx_vreplvei_d(/*__m128i*/ _1, /*ui1*/ _2) \ + ((__m128i)__builtin_lsx_vreplvei_d((v2i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpickev_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpickev_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpickev_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpickev_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpickev_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpickev_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpickev_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpickev_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpickod_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpickod_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpickod_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpickod_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpickod_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpickod_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpickod_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpickod_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vilvh_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vilvh_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vilvh_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vilvh_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vilvh_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vilvh_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vilvh_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vilvh_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vilvl_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vilvl_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vilvl_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vilvl_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vilvl_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vilvl_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vilvl_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vilvl_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpackev_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpackev_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpackev_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpackev_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpackev_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpackev_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpackev_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpackev_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpackod_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpackod_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpackod_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpackod_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpackod_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpackod_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpackod_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vpackod_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vshuf_h(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vshuf_h((v8i16)_1, (v8i16)_2, (v8i16)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vshuf_w(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vshuf_w((v4i32)_1, (v4i32)_2, (v4i32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vshuf_d(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vshuf_d((v2i64)_1, (v2i64)_2, (v2i64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vand_v(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vand_v((v16u8)_1, (v16u8)_2); +} + +#define __lsx_vandi_b(/*__m128i*/ _1, /*ui8*/ _2) \ + ((__m128i)__builtin_lsx_vandi_b((v16u8)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vor_v(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vor_v((v16u8)_1, (v16u8)_2); +} + +#define __lsx_vori_b(/*__m128i*/ _1, /*ui8*/ _2) \ + ((__m128i)__builtin_lsx_vori_b((v16u8)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vnor_v(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vnor_v((v16u8)_1, (v16u8)_2); +} + +#define __lsx_vnori_b(/*__m128i*/ _1, /*ui8*/ _2) \ + ((__m128i)__builtin_lsx_vnori_b((v16u8)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vxor_v(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vxor_v((v16u8)_1, (v16u8)_2); +} + +#define __lsx_vxori_b(/*__m128i*/ _1, /*ui8*/ _2) \ + ((__m128i)__builtin_lsx_vxori_b((v16u8)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vbitsel_v(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vbitsel_v((v16u8)_1, (v16u8)_2, (v16u8)_3); +} + +#define __lsx_vbitseli_b(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ + ((__m128i)__builtin_lsx_vbitseli_b((v16u8)(_1), (v16u8)(_2), (_3))) + +#define __lsx_vshuf4i_b(/*__m128i*/ _1, /*ui8*/ _2) \ + ((__m128i)__builtin_lsx_vshuf4i_b((v16i8)(_1), (_2))) + +#define __lsx_vshuf4i_h(/*__m128i*/ _1, /*ui8*/ _2) \ + ((__m128i)__builtin_lsx_vshuf4i_h((v8i16)(_1), (_2))) + +#define __lsx_vshuf4i_w(/*__m128i*/ _1, /*ui8*/ _2) \ + ((__m128i)__builtin_lsx_vshuf4i_w((v4i32)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vreplgr2vr_b(int _1) { + return (__m128i)__builtin_lsx_vreplgr2vr_b((int)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vreplgr2vr_h(int _1) { + return (__m128i)__builtin_lsx_vreplgr2vr_h((int)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vreplgr2vr_w(int _1) { + return (__m128i)__builtin_lsx_vreplgr2vr_w((int)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vreplgr2vr_d(long int _1) { + return (__m128i)__builtin_lsx_vreplgr2vr_d((long int)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpcnt_b(__m128i _1) { + return (__m128i)__builtin_lsx_vpcnt_b((v16i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpcnt_h(__m128i _1) { + return (__m128i)__builtin_lsx_vpcnt_h((v8i16)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpcnt_w(__m128i _1) { + return (__m128i)__builtin_lsx_vpcnt_w((v4i32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vpcnt_d(__m128i _1) { + return (__m128i)__builtin_lsx_vpcnt_d((v2i64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vclo_b(__m128i _1) { + return (__m128i)__builtin_lsx_vclo_b((v16i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vclo_h(__m128i _1) { + return (__m128i)__builtin_lsx_vclo_h((v8i16)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vclo_w(__m128i _1) { + return (__m128i)__builtin_lsx_vclo_w((v4i32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vclo_d(__m128i _1) { + return (__m128i)__builtin_lsx_vclo_d((v2i64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vclz_b(__m128i _1) { + return (__m128i)__builtin_lsx_vclz_b((v16i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vclz_h(__m128i _1) { + return (__m128i)__builtin_lsx_vclz_h((v8i16)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vclz_w(__m128i _1) { + return (__m128i)__builtin_lsx_vclz_w((v4i32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vclz_d(__m128i _1) { + return (__m128i)__builtin_lsx_vclz_d((v2i64)_1); +} + +#define __lsx_vpickve2gr_b(/*__m128i*/ _1, /*ui4*/ _2) \ + ((int)__builtin_lsx_vpickve2gr_b((v16i8)(_1), (_2))) + +#define __lsx_vpickve2gr_h(/*__m128i*/ _1, /*ui3*/ _2) \ + ((int)__builtin_lsx_vpickve2gr_h((v8i16)(_1), (_2))) + +#define __lsx_vpickve2gr_w(/*__m128i*/ _1, /*ui2*/ _2) \ + ((int)__builtin_lsx_vpickve2gr_w((v4i32)(_1), (_2))) + +#define __lsx_vpickve2gr_d(/*__m128i*/ _1, /*ui1*/ _2) \ + ((long int)__builtin_lsx_vpickve2gr_d((v2i64)(_1), (_2))) + +#define __lsx_vpickve2gr_bu(/*__m128i*/ _1, /*ui4*/ _2) \ + ((unsigned int)__builtin_lsx_vpickve2gr_bu((v16i8)(_1), (_2))) + +#define __lsx_vpickve2gr_hu(/*__m128i*/ _1, /*ui3*/ _2) \ + ((unsigned int)__builtin_lsx_vpickve2gr_hu((v8i16)(_1), (_2))) + +#define __lsx_vpickve2gr_wu(/*__m128i*/ _1, /*ui2*/ _2) \ + ((unsigned int)__builtin_lsx_vpickve2gr_wu((v4i32)(_1), (_2))) + +#define __lsx_vpickve2gr_du(/*__m128i*/ _1, /*ui1*/ _2) \ + ((unsigned long int)__builtin_lsx_vpickve2gr_du((v2i64)(_1), (_2))) + +#define __lsx_vinsgr2vr_b(/*__m128i*/ _1, /*int*/ _2, /*ui4*/ _3) \ + ((__m128i)__builtin_lsx_vinsgr2vr_b((v16i8)(_1), (int)(_2), (_3))) + +#define __lsx_vinsgr2vr_h(/*__m128i*/ _1, /*int*/ _2, /*ui3*/ _3) \ + ((__m128i)__builtin_lsx_vinsgr2vr_h((v8i16)(_1), (int)(_2), (_3))) + +#define __lsx_vinsgr2vr_w(/*__m128i*/ _1, /*int*/ _2, /*ui2*/ _3) \ + ((__m128i)__builtin_lsx_vinsgr2vr_w((v4i32)(_1), (int)(_2), (_3))) + +#define __lsx_vinsgr2vr_d(/*__m128i*/ _1, /*long int*/ _2, /*ui1*/ _3) \ + ((__m128i)__builtin_lsx_vinsgr2vr_d((v2i64)(_1), (long int)(_2), (_3))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfadd_s(__m128 _1, __m128 _2) { + return (__m128)__builtin_lsx_vfadd_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfadd_d(__m128d _1, __m128d _2) { + return (__m128d)__builtin_lsx_vfadd_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfsub_s(__m128 _1, __m128 _2) { + return (__m128)__builtin_lsx_vfsub_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfsub_d(__m128d _1, __m128d _2) { + return (__m128d)__builtin_lsx_vfsub_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfmul_s(__m128 _1, __m128 _2) { + return (__m128)__builtin_lsx_vfmul_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfmul_d(__m128d _1, __m128d _2) { + return (__m128d)__builtin_lsx_vfmul_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfdiv_s(__m128 _1, __m128 _2) { + return (__m128)__builtin_lsx_vfdiv_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfdiv_d(__m128d _1, __m128d _2) { + return (__m128d)__builtin_lsx_vfdiv_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcvt_h_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcvt_h_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfcvt_s_d(__m128d _1, __m128d _2) { + return (__m128)__builtin_lsx_vfcvt_s_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfmin_s(__m128 _1, __m128 _2) { + return (__m128)__builtin_lsx_vfmin_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfmin_d(__m128d _1, __m128d _2) { + return (__m128d)__builtin_lsx_vfmin_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfmina_s(__m128 _1, __m128 _2) { + return (__m128)__builtin_lsx_vfmina_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfmina_d(__m128d _1, __m128d _2) { + return (__m128d)__builtin_lsx_vfmina_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfmax_s(__m128 _1, __m128 _2) { + return (__m128)__builtin_lsx_vfmax_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfmax_d(__m128d _1, __m128d _2) { + return (__m128d)__builtin_lsx_vfmax_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfmaxa_s(__m128 _1, __m128 _2) { + return (__m128)__builtin_lsx_vfmaxa_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfmaxa_d(__m128d _1, __m128d _2) { + return (__m128d)__builtin_lsx_vfmaxa_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfclass_s(__m128 _1) { + return (__m128i)__builtin_lsx_vfclass_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfclass_d(__m128d _1) { + return (__m128i)__builtin_lsx_vfclass_d((v2f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfsqrt_s(__m128 _1) { + return (__m128)__builtin_lsx_vfsqrt_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfsqrt_d(__m128d _1) { + return (__m128d)__builtin_lsx_vfsqrt_d((v2f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfrecip_s(__m128 _1) { + return (__m128)__builtin_lsx_vfrecip_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfrecip_d(__m128d _1) { + return (__m128d)__builtin_lsx_vfrecip_d((v2f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfrint_s(__m128 _1) { + return (__m128)__builtin_lsx_vfrint_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfrint_d(__m128d _1) { + return (__m128d)__builtin_lsx_vfrint_d((v2f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfrsqrt_s(__m128 _1) { + return (__m128)__builtin_lsx_vfrsqrt_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfrsqrt_d(__m128d _1) { + return (__m128d)__builtin_lsx_vfrsqrt_d((v2f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vflogb_s(__m128 _1) { + return (__m128)__builtin_lsx_vflogb_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vflogb_d(__m128d _1) { + return (__m128d)__builtin_lsx_vflogb_d((v2f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfcvth_s_h(__m128i _1) { + return (__m128)__builtin_lsx_vfcvth_s_h((v8i16)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfcvth_d_s(__m128 _1) { + return (__m128d)__builtin_lsx_vfcvth_d_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfcvtl_s_h(__m128i _1) { + return (__m128)__builtin_lsx_vfcvtl_s_h((v8i16)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfcvtl_d_s(__m128 _1) { + return (__m128d)__builtin_lsx_vfcvtl_d_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftint_w_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftint_w_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftint_l_d(__m128d _1) { + return (__m128i)__builtin_lsx_vftint_l_d((v2f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftint_wu_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftint_wu_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftint_lu_d(__m128d _1) { + return (__m128i)__builtin_lsx_vftint_lu_d((v2f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrz_w_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftintrz_w_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrz_l_d(__m128d _1) { + return (__m128i)__builtin_lsx_vftintrz_l_d((v2f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrz_wu_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftintrz_wu_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrz_lu_d(__m128d _1) { + return (__m128i)__builtin_lsx_vftintrz_lu_d((v2f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vffint_s_w(__m128i _1) { + return (__m128)__builtin_lsx_vffint_s_w((v4i32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vffint_d_l(__m128i _1) { + return (__m128d)__builtin_lsx_vffint_d_l((v2i64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vffint_s_wu(__m128i _1) { + return (__m128)__builtin_lsx_vffint_s_wu((v4u32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vffint_d_lu(__m128i _1) { + return (__m128d)__builtin_lsx_vffint_d_lu((v2u64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vandn_v(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vandn_v((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vneg_b(__m128i _1) { + return (__m128i)__builtin_lsx_vneg_b((v16i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vneg_h(__m128i _1) { + return (__m128i)__builtin_lsx_vneg_h((v8i16)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vneg_w(__m128i _1) { + return (__m128i)__builtin_lsx_vneg_w((v4i32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vneg_d(__m128i _1) { + return (__m128i)__builtin_lsx_vneg_d((v2i64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmuh_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmuh_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmuh_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmuh_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmuh_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmuh_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmuh_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmuh_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmuh_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmuh_bu((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmuh_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmuh_hu((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmuh_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmuh_wu((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmuh_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmuh_du((v2u64)_1, (v2u64)_2); +} + +#define __lsx_vsllwil_h_b(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vsllwil_h_b((v16i8)(_1), (_2))) + +#define __lsx_vsllwil_w_h(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vsllwil_w_h((v8i16)(_1), (_2))) + +#define __lsx_vsllwil_d_w(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vsllwil_d_w((v4i32)(_1), (_2))) + +#define __lsx_vsllwil_hu_bu(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vsllwil_hu_bu((v16u8)(_1), (_2))) + +#define __lsx_vsllwil_wu_hu(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vsllwil_wu_hu((v8u16)(_1), (_2))) + +#define __lsx_vsllwil_du_wu(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vsllwil_du_wu((v4u32)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsran_b_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsran_b_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsran_h_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsran_h_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsran_w_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsran_w_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssran_b_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssran_b_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssran_h_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssran_h_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssran_w_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssran_w_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssran_bu_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssran_bu_h((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssran_hu_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssran_hu_w((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssran_wu_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssran_wu_d((v2u64)_1, (v2u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrarn_b_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrarn_b_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrarn_h_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrarn_h_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrarn_w_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrarn_w_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrarn_b_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrarn_b_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrarn_h_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrarn_h_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrarn_w_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrarn_w_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrarn_bu_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrarn_bu_h((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrarn_hu_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrarn_hu_w((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrarn_wu_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrarn_wu_d((v2u64)_1, (v2u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrln_b_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrln_b_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrln_h_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrln_h_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrln_w_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrln_w_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrln_bu_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrln_bu_h((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrln_hu_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrln_hu_w((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrln_wu_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrln_wu_d((v2u64)_1, (v2u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrlrn_b_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrlrn_b_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrlrn_h_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrlrn_h_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsrlrn_w_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsrlrn_w_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrlrn_bu_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrlrn_bu_h((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrlrn_hu_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrlrn_hu_w((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrlrn_wu_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrlrn_wu_d((v2u64)_1, (v2u64)_2); +} + +#define __lsx_vfrstpi_b(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vfrstpi_b((v16i8)(_1), (v16i8)(_2), (_3))) + +#define __lsx_vfrstpi_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vfrstpi_h((v8i16)(_1), (v8i16)(_2), (_3))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfrstp_b(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vfrstp_b((v16i8)_1, (v16i8)_2, (v16i8)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfrstp_h(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vfrstp_h((v8i16)_1, (v8i16)_2, (v8i16)_3); +} + +#define __lsx_vshuf4i_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ + ((__m128i)__builtin_lsx_vshuf4i_d((v2i64)(_1), (v2i64)(_2), (_3))) + +#define __lsx_vbsrl_v(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vbsrl_v((v16i8)(_1), (_2))) + +#define __lsx_vbsll_v(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vbsll_v((v16i8)(_1), (_2))) + +#define __lsx_vextrins_b(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ + ((__m128i)__builtin_lsx_vextrins_b((v16i8)(_1), (v16i8)(_2), (_3))) + +#define __lsx_vextrins_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ + ((__m128i)__builtin_lsx_vextrins_h((v8i16)(_1), (v8i16)(_2), (_3))) + +#define __lsx_vextrins_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ + ((__m128i)__builtin_lsx_vextrins_w((v4i32)(_1), (v4i32)(_2), (_3))) + +#define __lsx_vextrins_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ + ((__m128i)__builtin_lsx_vextrins_d((v2i64)(_1), (v2i64)(_2), (_3))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmskltz_b(__m128i _1) { + return (__m128i)__builtin_lsx_vmskltz_b((v16i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmskltz_h(__m128i _1) { + return (__m128i)__builtin_lsx_vmskltz_h((v8i16)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmskltz_w(__m128i _1) { + return (__m128i)__builtin_lsx_vmskltz_w((v4i32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmskltz_d(__m128i _1) { + return (__m128i)__builtin_lsx_vmskltz_d((v2i64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsigncov_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsigncov_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsigncov_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsigncov_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsigncov_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsigncov_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsigncov_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsigncov_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfmadd_s(__m128 _1, __m128 _2, __m128 _3) { + return (__m128)__builtin_lsx_vfmadd_s((v4f32)_1, (v4f32)_2, (v4f32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfmadd_d(__m128d _1, __m128d _2, __m128d _3) { + return (__m128d)__builtin_lsx_vfmadd_d((v2f64)_1, (v2f64)_2, (v2f64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfmsub_s(__m128 _1, __m128 _2, __m128 _3) { + return (__m128)__builtin_lsx_vfmsub_s((v4f32)_1, (v4f32)_2, (v4f32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfmsub_d(__m128d _1, __m128d _2, __m128d _3) { + return (__m128d)__builtin_lsx_vfmsub_d((v2f64)_1, (v2f64)_2, (v2f64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfnmadd_s(__m128 _1, __m128 _2, __m128 _3) { + return (__m128)__builtin_lsx_vfnmadd_s((v4f32)_1, (v4f32)_2, (v4f32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfnmadd_d(__m128d _1, __m128d _2, __m128d _3) { + return (__m128d)__builtin_lsx_vfnmadd_d((v2f64)_1, (v2f64)_2, (v2f64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfnmsub_s(__m128 _1, __m128 _2, __m128 _3) { + return (__m128)__builtin_lsx_vfnmsub_s((v4f32)_1, (v4f32)_2, (v4f32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfnmsub_d(__m128d _1, __m128d _2, __m128d _3) { + return (__m128d)__builtin_lsx_vfnmsub_d((v2f64)_1, (v2f64)_2, (v2f64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrne_w_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftintrne_w_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrne_l_d(__m128d _1) { + return (__m128i)__builtin_lsx_vftintrne_l_d((v2f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrp_w_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftintrp_w_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrp_l_d(__m128d _1) { + return (__m128i)__builtin_lsx_vftintrp_l_d((v2f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrm_w_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftintrm_w_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrm_l_d(__m128d _1) { + return (__m128i)__builtin_lsx_vftintrm_l_d((v2f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftint_w_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vftint_w_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vffint_s_l(__m128i _1, __m128i _2) { + return (__m128)__builtin_lsx_vffint_s_l((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrz_w_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vftintrz_w_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrp_w_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vftintrp_w_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrm_w_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vftintrm_w_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrne_w_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vftintrne_w_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintl_l_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftintl_l_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftinth_l_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftinth_l_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vffinth_d_w(__m128i _1) { + return (__m128d)__builtin_lsx_vffinth_d_w((v4i32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vffintl_d_w(__m128i _1) { + return (__m128d)__builtin_lsx_vffintl_d_w((v4i32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrzl_l_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftintrzl_l_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrzh_l_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftintrzh_l_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrpl_l_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftintrpl_l_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrph_l_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftintrph_l_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrml_l_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftintrml_l_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrmh_l_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftintrmh_l_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrnel_l_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftintrnel_l_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vftintrneh_l_s(__m128 _1) { + return (__m128i)__builtin_lsx_vftintrneh_l_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfrintrne_s(__m128 _1) { + return (__m128)__builtin_lsx_vfrintrne_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfrintrne_d(__m128d _1) { + return (__m128d)__builtin_lsx_vfrintrne_d((v2f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfrintrz_s(__m128 _1) { + return (__m128)__builtin_lsx_vfrintrz_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfrintrz_d(__m128d _1) { + return (__m128d)__builtin_lsx_vfrintrz_d((v2f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfrintrp_s(__m128 _1) { + return (__m128)__builtin_lsx_vfrintrp_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfrintrp_d(__m128d _1) { + return (__m128d)__builtin_lsx_vfrintrp_d((v2f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfrintrm_s(__m128 _1) { + return (__m128)__builtin_lsx_vfrintrm_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfrintrm_d(__m128d _1) { + return (__m128d)__builtin_lsx_vfrintrm_d((v2f64)_1); +} + +#define __lsx_vstelm_b(/*__m128i*/ _1, /*void **/ _2, /*si8*/ _3, /*idx*/ _4) \ + ((void)__builtin_lsx_vstelm_b((v16i8)(_1), (void *)(_2), (_3), (_4))) + +#define __lsx_vstelm_h(/*__m128i*/ _1, /*void **/ _2, /*si8*/ _3, /*idx*/ _4) \ + ((void)__builtin_lsx_vstelm_h((v8i16)(_1), (void *)(_2), (_3), (_4))) + +#define __lsx_vstelm_w(/*__m128i*/ _1, /*void **/ _2, /*si8*/ _3, /*idx*/ _4) \ + ((void)__builtin_lsx_vstelm_w((v4i32)(_1), (void *)(_2), (_3), (_4))) + +#define __lsx_vstelm_d(/*__m128i*/ _1, /*void **/ _2, /*si8*/ _3, /*idx*/ _4) \ + ((void)__builtin_lsx_vstelm_d((v2i64)(_1), (void *)(_2), (_3), (_4))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwev_d_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwev_d_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwev_w_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwev_w_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwev_h_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwev_h_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwod_d_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwod_d_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwod_w_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwod_w_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwod_h_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwod_h_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwev_d_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwev_d_wu((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwev_w_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwev_w_hu((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwev_h_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwev_h_bu((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwod_d_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwod_d_wu((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwod_w_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwod_w_hu((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwod_h_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwod_h_bu((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwev_d_wu_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwev_d_wu_w((v4u32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwev_w_hu_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwev_w_hu_h((v8u16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwev_h_bu_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwev_h_bu_b((v16u8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwod_d_wu_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwod_d_wu_w((v4u32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwod_w_hu_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwod_w_hu_h((v8u16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwod_h_bu_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwod_h_bu_b((v16u8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwev_d_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwev_d_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwev_w_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwev_w_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwev_h_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwev_h_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwod_d_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwod_d_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwod_w_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwod_w_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwod_h_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwod_h_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwev_d_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwev_d_wu((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwev_w_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwev_w_hu((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwev_h_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwev_h_bu((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwod_d_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwod_d_wu((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwod_w_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwod_w_hu((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwod_h_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwod_h_bu((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwev_q_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwev_q_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwod_q_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwod_q_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwev_q_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwev_q_du((v2u64)_1, (v2u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwod_q_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwod_q_du((v2u64)_1, (v2u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwev_q_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwev_q_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwod_q_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwod_q_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwev_q_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwev_q_du((v2u64)_1, (v2u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsubwod_q_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsubwod_q_du((v2u64)_1, (v2u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwev_q_du_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwev_q_du_d((v2u64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vaddwod_q_du_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vaddwod_q_du_d((v2u64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwev_d_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwev_d_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwev_w_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwev_w_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwev_h_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwev_h_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwod_d_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwod_d_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwod_w_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwod_w_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwod_h_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwod_h_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwev_d_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwev_d_wu((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwev_w_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwev_w_hu((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwev_h_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwev_h_bu((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwod_d_wu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwod_d_wu((v4u32)_1, (v4u32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwod_w_hu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwod_w_hu((v8u16)_1, (v8u16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwod_h_bu(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwod_h_bu((v16u8)_1, (v16u8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwev_d_wu_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwev_d_wu_w((v4u32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwev_w_hu_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwev_w_hu_h((v8u16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwev_h_bu_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwev_h_bu_b((v16u8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwod_d_wu_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwod_d_wu_w((v4u32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwod_w_hu_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwod_w_hu_h((v8u16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwod_h_bu_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwod_h_bu_b((v16u8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwev_q_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwev_q_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwod_q_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwod_q_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwev_q_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwev_q_du((v2u64)_1, (v2u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwod_q_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwod_q_du((v2u64)_1, (v2u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwev_q_du_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwev_q_du_d((v2u64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmulwod_q_du_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vmulwod_q_du_d((v2u64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhaddw_q_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhaddw_q_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhaddw_qu_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhaddw_qu_du((v2u64)_1, (v2u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhsubw_q_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhsubw_q_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vhsubw_qu_du(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vhsubw_qu_du((v2u64)_1, (v2u64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwev_d_w(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwev_d_w((v2i64)_1, (v4i32)_2, (v4i32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwev_w_h(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwev_w_h((v4i32)_1, (v8i16)_2, (v8i16)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwev_h_b(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwev_h_b((v8i16)_1, (v16i8)_2, (v16i8)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwev_d_wu(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwev_d_wu((v2u64)_1, (v4u32)_2, (v4u32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwev_w_hu(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwev_w_hu((v4u32)_1, (v8u16)_2, (v8u16)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwev_h_bu(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwev_h_bu((v8u16)_1, (v16u8)_2, (v16u8)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwod_d_w(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwod_d_w((v2i64)_1, (v4i32)_2, (v4i32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwod_w_h(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwod_w_h((v4i32)_1, (v8i16)_2, (v8i16)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwod_h_b(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwod_h_b((v8i16)_1, (v16i8)_2, (v16i8)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwod_d_wu(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwod_d_wu((v2u64)_1, (v4u32)_2, (v4u32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwod_w_hu(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwod_w_hu((v4u32)_1, (v8u16)_2, (v8u16)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwod_h_bu(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwod_h_bu((v8u16)_1, (v16u8)_2, (v16u8)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwev_d_wu_w(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwev_d_wu_w((v2i64)_1, (v4u32)_2, + (v4i32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwev_w_hu_h(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwev_w_hu_h((v4i32)_1, (v8u16)_2, + (v8i16)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwev_h_bu_b(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwev_h_bu_b((v8i16)_1, (v16u8)_2, + (v16i8)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwod_d_wu_w(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwod_d_wu_w((v2i64)_1, (v4u32)_2, + (v4i32)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwod_w_hu_h(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwod_w_hu_h((v4i32)_1, (v8u16)_2, + (v8i16)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwod_h_bu_b(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwod_h_bu_b((v8i16)_1, (v16u8)_2, + (v16i8)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwev_q_d(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwev_q_d((v2i64)_1, (v2i64)_2, (v2i64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwod_q_d(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwod_q_d((v2i64)_1, (v2i64)_2, (v2i64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwev_q_du(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwev_q_du((v2u64)_1, (v2u64)_2, (v2u64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwod_q_du(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwod_q_du((v2u64)_1, (v2u64)_2, (v2u64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwev_q_du_d(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwev_q_du_d((v2i64)_1, (v2u64)_2, + (v2i64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmaddwod_q_du_d(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vmaddwod_q_du_d((v2i64)_1, (v2u64)_2, + (v2i64)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vrotr_b(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vrotr_b((v16i8)_1, (v16i8)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vrotr_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vrotr_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vrotr_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vrotr_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vrotr_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vrotr_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vadd_q(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vadd_q((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vsub_q(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vsub_q((v2i64)_1, (v2i64)_2); +} + +#define __lsx_vldrepl_b(/*void **/ _1, /*si12*/ _2) \ + ((__m128i)__builtin_lsx_vldrepl_b((void const *)(_1), (_2))) + +#define __lsx_vldrepl_h(/*void **/ _1, /*si11*/ _2) \ + ((__m128i)__builtin_lsx_vldrepl_h((void const *)(_1), (_2))) + +#define __lsx_vldrepl_w(/*void **/ _1, /*si10*/ _2) \ + ((__m128i)__builtin_lsx_vldrepl_w((void const *)(_1), (_2))) + +#define __lsx_vldrepl_d(/*void **/ _1, /*si9*/ _2) \ + ((__m128i)__builtin_lsx_vldrepl_d((void const *)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmskgez_b(__m128i _1) { + return (__m128i)__builtin_lsx_vmskgez_b((v16i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vmsknz_b(__m128i _1) { + return (__m128i)__builtin_lsx_vmsknz_b((v16i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vexth_h_b(__m128i _1) { + return (__m128i)__builtin_lsx_vexth_h_b((v16i8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vexth_w_h(__m128i _1) { + return (__m128i)__builtin_lsx_vexth_w_h((v8i16)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vexth_d_w(__m128i _1) { + return (__m128i)__builtin_lsx_vexth_d_w((v4i32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vexth_q_d(__m128i _1) { + return (__m128i)__builtin_lsx_vexth_q_d((v2i64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vexth_hu_bu(__m128i _1) { + return (__m128i)__builtin_lsx_vexth_hu_bu((v16u8)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vexth_wu_hu(__m128i _1) { + return (__m128i)__builtin_lsx_vexth_wu_hu((v8u16)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vexth_du_wu(__m128i _1) { + return (__m128i)__builtin_lsx_vexth_du_wu((v4u32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vexth_qu_du(__m128i _1) { + return (__m128i)__builtin_lsx_vexth_qu_du((v2u64)_1); +} + +#define __lsx_vrotri_b(/*__m128i*/ _1, /*ui3*/ _2) \ + ((__m128i)__builtin_lsx_vrotri_b((v16i8)(_1), (_2))) + +#define __lsx_vrotri_h(/*__m128i*/ _1, /*ui4*/ _2) \ + ((__m128i)__builtin_lsx_vrotri_h((v8i16)(_1), (_2))) + +#define __lsx_vrotri_w(/*__m128i*/ _1, /*ui5*/ _2) \ + ((__m128i)__builtin_lsx_vrotri_w((v4i32)(_1), (_2))) + +#define __lsx_vrotri_d(/*__m128i*/ _1, /*ui6*/ _2) \ + ((__m128i)__builtin_lsx_vrotri_d((v2i64)(_1), (_2))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vextl_q_d(__m128i _1) { + return (__m128i)__builtin_lsx_vextl_q_d((v2i64)_1); +} + +#define __lsx_vsrlni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ + ((__m128i)__builtin_lsx_vsrlni_b_h((v16i8)(_1), (v16i8)(_2), (_3))) + +#define __lsx_vsrlni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vsrlni_h_w((v8i16)(_1), (v8i16)(_2), (_3))) + +#define __lsx_vsrlni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ + ((__m128i)__builtin_lsx_vsrlni_w_d((v4i32)(_1), (v4i32)(_2), (_3))) + +#define __lsx_vsrlni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ + ((__m128i)__builtin_lsx_vsrlni_d_q((v2i64)(_1), (v2i64)(_2), (_3))) + +#define __lsx_vsrlrni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ + ((__m128i)__builtin_lsx_vsrlrni_b_h((v16i8)(_1), (v16i8)(_2), (_3))) + +#define __lsx_vsrlrni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vsrlrni_h_w((v8i16)(_1), (v8i16)(_2), (_3))) + +#define __lsx_vsrlrni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ + ((__m128i)__builtin_lsx_vsrlrni_w_d((v4i32)(_1), (v4i32)(_2), (_3))) + +#define __lsx_vsrlrni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ + ((__m128i)__builtin_lsx_vsrlrni_d_q((v2i64)(_1), (v2i64)(_2), (_3))) + +#define __lsx_vssrlni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ + ((__m128i)__builtin_lsx_vssrlni_b_h((v16i8)(_1), (v16i8)(_2), (_3))) + +#define __lsx_vssrlni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vssrlni_h_w((v8i16)(_1), (v8i16)(_2), (_3))) + +#define __lsx_vssrlni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ + ((__m128i)__builtin_lsx_vssrlni_w_d((v4i32)(_1), (v4i32)(_2), (_3))) + +#define __lsx_vssrlni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ + ((__m128i)__builtin_lsx_vssrlni_d_q((v2i64)(_1), (v2i64)(_2), (_3))) + +#define __lsx_vssrlni_bu_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ + ((__m128i)__builtin_lsx_vssrlni_bu_h((v16u8)(_1), (v16i8)(_2), (_3))) + +#define __lsx_vssrlni_hu_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vssrlni_hu_w((v8u16)(_1), (v8i16)(_2), (_3))) + +#define __lsx_vssrlni_wu_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ + ((__m128i)__builtin_lsx_vssrlni_wu_d((v4u32)(_1), (v4i32)(_2), (_3))) + +#define __lsx_vssrlni_du_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ + ((__m128i)__builtin_lsx_vssrlni_du_q((v2u64)(_1), (v2i64)(_2), (_3))) + +#define __lsx_vssrlrni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ + ((__m128i)__builtin_lsx_vssrlrni_b_h((v16i8)(_1), (v16i8)(_2), (_3))) + +#define __lsx_vssrlrni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vssrlrni_h_w((v8i16)(_1), (v8i16)(_2), (_3))) + +#define __lsx_vssrlrni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ + ((__m128i)__builtin_lsx_vssrlrni_w_d((v4i32)(_1), (v4i32)(_2), (_3))) + +#define __lsx_vssrlrni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ + ((__m128i)__builtin_lsx_vssrlrni_d_q((v2i64)(_1), (v2i64)(_2), (_3))) + +#define __lsx_vssrlrni_bu_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ + ((__m128i)__builtin_lsx_vssrlrni_bu_h((v16u8)(_1), (v16i8)(_2), (_3))) + +#define __lsx_vssrlrni_hu_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vssrlrni_hu_w((v8u16)(_1), (v8i16)(_2), (_3))) + +#define __lsx_vssrlrni_wu_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ + ((__m128i)__builtin_lsx_vssrlrni_wu_d((v4u32)(_1), (v4i32)(_2), (_3))) + +#define __lsx_vssrlrni_du_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ + ((__m128i)__builtin_lsx_vssrlrni_du_q((v2u64)(_1), (v2i64)(_2), (_3))) + +#define __lsx_vsrani_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ + ((__m128i)__builtin_lsx_vsrani_b_h((v16i8)(_1), (v16i8)(_2), (_3))) + +#define __lsx_vsrani_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vsrani_h_w((v8i16)(_1), (v8i16)(_2), (_3))) + +#define __lsx_vsrani_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ + ((__m128i)__builtin_lsx_vsrani_w_d((v4i32)(_1), (v4i32)(_2), (_3))) + +#define __lsx_vsrani_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ + ((__m128i)__builtin_lsx_vsrani_d_q((v2i64)(_1), (v2i64)(_2), (_3))) + +#define __lsx_vsrarni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ + ((__m128i)__builtin_lsx_vsrarni_b_h((v16i8)(_1), (v16i8)(_2), (_3))) + +#define __lsx_vsrarni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vsrarni_h_w((v8i16)(_1), (v8i16)(_2), (_3))) + +#define __lsx_vsrarni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ + ((__m128i)__builtin_lsx_vsrarni_w_d((v4i32)(_1), (v4i32)(_2), (_3))) + +#define __lsx_vsrarni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ + ((__m128i)__builtin_lsx_vsrarni_d_q((v2i64)(_1), (v2i64)(_2), (_3))) + +#define __lsx_vssrani_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ + ((__m128i)__builtin_lsx_vssrani_b_h((v16i8)(_1), (v16i8)(_2), (_3))) + +#define __lsx_vssrani_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vssrani_h_w((v8i16)(_1), (v8i16)(_2), (_3))) + +#define __lsx_vssrani_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ + ((__m128i)__builtin_lsx_vssrani_w_d((v4i32)(_1), (v4i32)(_2), (_3))) + +#define __lsx_vssrani_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ + ((__m128i)__builtin_lsx_vssrani_d_q((v2i64)(_1), (v2i64)(_2), (_3))) + +#define __lsx_vssrani_bu_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ + ((__m128i)__builtin_lsx_vssrani_bu_h((v16u8)(_1), (v16i8)(_2), (_3))) + +#define __lsx_vssrani_hu_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vssrani_hu_w((v8u16)(_1), (v8i16)(_2), (_3))) + +#define __lsx_vssrani_wu_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ + ((__m128i)__builtin_lsx_vssrani_wu_d((v4u32)(_1), (v4i32)(_2), (_3))) + +#define __lsx_vssrani_du_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ + ((__m128i)__builtin_lsx_vssrani_du_q((v2u64)(_1), (v2i64)(_2), (_3))) + +#define __lsx_vssrarni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ + ((__m128i)__builtin_lsx_vssrarni_b_h((v16i8)(_1), (v16i8)(_2), (_3))) + +#define __lsx_vssrarni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vssrarni_h_w((v8i16)(_1), (v8i16)(_2), (_3))) + +#define __lsx_vssrarni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ + ((__m128i)__builtin_lsx_vssrarni_w_d((v4i32)(_1), (v4i32)(_2), (_3))) + +#define __lsx_vssrarni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ + ((__m128i)__builtin_lsx_vssrarni_d_q((v2i64)(_1), (v2i64)(_2), (_3))) + +#define __lsx_vssrarni_bu_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ + ((__m128i)__builtin_lsx_vssrarni_bu_h((v16u8)(_1), (v16i8)(_2), (_3))) + +#define __lsx_vssrarni_hu_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ + ((__m128i)__builtin_lsx_vssrarni_hu_w((v8u16)(_1), (v8i16)(_2), (_3))) + +#define __lsx_vssrarni_wu_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ + ((__m128i)__builtin_lsx_vssrarni_wu_d((v4u32)(_1), (v4i32)(_2), (_3))) + +#define __lsx_vssrarni_du_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ + ((__m128i)__builtin_lsx_vssrarni_du_q((v2u64)(_1), (v2i64)(_2), (_3))) + +#define __lsx_vpermi_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ + ((__m128i)__builtin_lsx_vpermi_w((v4i32)(_1), (v4i32)(_2), (_3))) + +#define __lsx_vld(/*void **/ _1, /*si12*/ _2) \ + ((__m128i)__builtin_lsx_vld((void const *)(_1), (_2))) + +#define __lsx_vst(/*__m128i*/ _1, /*void **/ _2, /*si12*/ _3) \ + ((void)__builtin_lsx_vst((v16i8)(_1), (void *)(_2), (_3))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrlrn_b_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrlrn_b_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrlrn_h_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrlrn_h_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrlrn_w_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrlrn_w_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrln_b_h(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrln_b_h((v8i16)_1, (v8i16)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrln_h_w(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrln_h_w((v4i32)_1, (v4i32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vssrln_w_d(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vssrln_w_d((v2i64)_1, (v2i64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vorn_v(__m128i _1, __m128i _2) { + return (__m128i)__builtin_lsx_vorn_v((v16i8)_1, (v16i8)_2); +} + +#define __lsx_vldi(/*i13*/ _1) ((__m128i)__builtin_lsx_vldi((_1))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vshuf_b(__m128i _1, __m128i _2, __m128i _3) { + return (__m128i)__builtin_lsx_vshuf_b((v16i8)_1, (v16i8)_2, (v16i8)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vldx(void const *_1, long int _2) { + return (__m128i)__builtin_lsx_vldx((void const *)_1, (long int)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) void + __lsx_vstx(__m128i _1, void *_2, long int _3) { + return (void)__builtin_lsx_vstx((v16i8)_1, (void *)_2, (long int)_3); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vextl_qu_du(__m128i _1) { + return (__m128i)__builtin_lsx_vextl_qu_du((v2u64)_1); +} + +#define __lsx_bnz_b(/*__m128i*/ _1) ((int)__builtin_lsx_bnz_b((v16u8)(_1))) + +#define __lsx_bnz_d(/*__m128i*/ _1) ((int)__builtin_lsx_bnz_d((v2u64)(_1))) + +#define __lsx_bnz_h(/*__m128i*/ _1) ((int)__builtin_lsx_bnz_h((v8u16)(_1))) + +#define __lsx_bnz_v(/*__m128i*/ _1) ((int)__builtin_lsx_bnz_v((v16u8)(_1))) + +#define __lsx_bnz_w(/*__m128i*/ _1) ((int)__builtin_lsx_bnz_w((v4u32)(_1))) + +#define __lsx_bz_b(/*__m128i*/ _1) ((int)__builtin_lsx_bz_b((v16u8)(_1))) + +#define __lsx_bz_d(/*__m128i*/ _1) ((int)__builtin_lsx_bz_d((v2u64)(_1))) + +#define __lsx_bz_h(/*__m128i*/ _1) ((int)__builtin_lsx_bz_h((v8u16)(_1))) + +#define __lsx_bz_v(/*__m128i*/ _1) ((int)__builtin_lsx_bz_v((v16u8)(_1))) + +#define __lsx_bz_w(/*__m128i*/ _1) ((int)__builtin_lsx_bz_w((v4u32)(_1))) + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_caf_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_caf_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_caf_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_caf_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_ceq_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_ceq_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_ceq_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_ceq_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cle_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_cle_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cle_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_cle_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_clt_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_clt_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_clt_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_clt_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cne_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_cne_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cne_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_cne_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cor_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_cor_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cor_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_cor_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cueq_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_cueq_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cueq_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_cueq_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cule_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_cule_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cule_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_cule_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cult_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_cult_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cult_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_cult_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cun_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_cun_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cune_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_cune_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cune_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_cune_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_cun_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_cun_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_saf_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_saf_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_saf_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_saf_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_seq_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_seq_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_seq_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_seq_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sle_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_sle_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sle_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_sle_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_slt_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_slt_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_slt_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_slt_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sne_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_sne_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sne_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_sne_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sor_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_sor_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sor_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_sor_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sueq_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_sueq_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sueq_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_sueq_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sule_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_sule_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sule_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_sule_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sult_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_sult_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sult_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_sult_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sun_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_sun_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sune_d(__m128d _1, __m128d _2) { + return (__m128i)__builtin_lsx_vfcmp_sune_d((v2f64)_1, (v2f64)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sune_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_sune_s((v4f32)_1, (v4f32)_2); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i + __lsx_vfcmp_sun_s(__m128 _1, __m128 _2) { + return (__m128i)__builtin_lsx_vfcmp_sun_s((v4f32)_1, (v4f32)_2); +} + +#define __lsx_vrepli_b(/*si10*/ _1) ((__m128i)__builtin_lsx_vrepli_b((_1))) + +#define __lsx_vrepli_d(/*si10*/ _1) ((__m128i)__builtin_lsx_vrepli_d((_1))) + +#define __lsx_vrepli_h(/*si10*/ _1) ((__m128i)__builtin_lsx_vrepli_h((_1))) + +#define __lsx_vrepli_w(/*si10*/ _1) ((__m128i)__builtin_lsx_vrepli_w((_1))) + +#endif /* defined(__loongarch_sx) */ +#endif /* _LOONGSON_SXINTRIN_H */ diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 0d281d410c1975429181566b8f826fb5a9f0aa7e..935acb201a989c5320f3cb82319765a4adc9f861 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -3638,40 +3638,14 @@ bool Sema::CheckLoongArchBuiltinFunctionCall(const TargetInfo &TI, switch (BuiltinID) { default: break; + // Basic intrinsics. case LoongArch::BI__builtin_loongarch_cacop_d: - if (!TI.hasFeature("64bit")) - return Diag(TheCall->getBeginLoc(), - diag::err_loongarch_builtin_requires_la64) - << TheCall->getSourceRange(); - LLVM_FALLTHROUGH; case LoongArch::BI__builtin_loongarch_cacop_w: { - if (BuiltinID == LoongArch::BI__builtin_loongarch_cacop_w && - !TI.hasFeature("32bit")) - return Diag(TheCall->getBeginLoc(), - diag::err_loongarch_builtin_requires_la32) - << TheCall->getSourceRange(); SemaBuiltinConstantArgRange(TheCall, 0, 0, llvm::maxUIntN(5)); SemaBuiltinConstantArgRange(TheCall, 2, llvm::minIntN(12), llvm::maxIntN(12)); break; } - case LoongArch::BI__builtin_loongarch_crc_w_b_w: - case LoongArch::BI__builtin_loongarch_crc_w_h_w: - case LoongArch::BI__builtin_loongarch_crc_w_w_w: - case LoongArch::BI__builtin_loongarch_crc_w_d_w: - case LoongArch::BI__builtin_loongarch_crcc_w_b_w: - case LoongArch::BI__builtin_loongarch_crcc_w_h_w: - case LoongArch::BI__builtin_loongarch_crcc_w_w_w: - case LoongArch::BI__builtin_loongarch_crcc_w_d_w: - case LoongArch::BI__builtin_loongarch_iocsrrd_d: - case LoongArch::BI__builtin_loongarch_iocsrwr_d: - case LoongArch::BI__builtin_loongarch_asrtle_d: - case LoongArch::BI__builtin_loongarch_asrtgt_d: - if (!TI.hasFeature("64bit")) - return Diag(TheCall->getBeginLoc(), - diag::err_loongarch_builtin_requires_la64) - << TheCall->getSourceRange(); - break; case LoongArch::BI__builtin_loongarch_break: case LoongArch::BI__builtin_loongarch_dbar: case LoongArch::BI__builtin_loongarch_ibar: @@ -3679,41 +3653,475 @@ bool Sema::CheckLoongArchBuiltinFunctionCall(const TargetInfo &TI, // Check if immediate is in [0, 32767]. return SemaBuiltinConstantArgRange(TheCall, 0, 0, 32767); case LoongArch::BI__builtin_loongarch_csrrd_w: - return SemaBuiltinConstantArgRange(TheCall, 0, 0, 16383); - case LoongArch::BI__builtin_loongarch_csrwr_w: - return SemaBuiltinConstantArgRange(TheCall, 1, 0, 16383); - case LoongArch::BI__builtin_loongarch_csrxchg_w: - return SemaBuiltinConstantArgRange(TheCall, 2, 0, 16383); case LoongArch::BI__builtin_loongarch_csrrd_d: - if (!TI.hasFeature("64bit")) - return Diag(TheCall->getBeginLoc(), - diag::err_loongarch_builtin_requires_la64) - << TheCall->getSourceRange(); return SemaBuiltinConstantArgRange(TheCall, 0, 0, 16383); + case LoongArch::BI__builtin_loongarch_csrwr_w: case LoongArch::BI__builtin_loongarch_csrwr_d: - if (!TI.hasFeature("64bit")) - return Diag(TheCall->getBeginLoc(), - diag::err_loongarch_builtin_requires_la64) - << TheCall->getSourceRange(); return SemaBuiltinConstantArgRange(TheCall, 1, 0, 16383); + case LoongArch::BI__builtin_loongarch_csrxchg_w: case LoongArch::BI__builtin_loongarch_csrxchg_d: - if (!TI.hasFeature("64bit")) - return Diag(TheCall->getBeginLoc(), - diag::err_loongarch_builtin_requires_la64) - << TheCall->getSourceRange(); return SemaBuiltinConstantArgRange(TheCall, 2, 0, 16383); case LoongArch::BI__builtin_loongarch_lddir_d: case LoongArch::BI__builtin_loongarch_ldpte_d: - if (!TI.hasFeature("64bit")) - return Diag(TheCall->getBeginLoc(), - diag::err_loongarch_builtin_requires_la64) - << TheCall->getSourceRange(); return SemaBuiltinConstantArgRange(TheCall, 1, 0, 31); case LoongArch::BI__builtin_loongarch_movfcsr2gr: case LoongArch::BI__builtin_loongarch_movgr2fcsr: return SemaBuiltinConstantArgRange(TheCall, 0, 0, llvm::maxUIntN(2)); - } + // LSX intrinsics. + case LoongArch::BI__builtin_lsx_vbitclri_b: + case LoongArch::BI__builtin_lsx_vbitrevi_b: + case LoongArch::BI__builtin_lsx_vbitseti_b: + case LoongArch::BI__builtin_lsx_vsat_b: + case LoongArch::BI__builtin_lsx_vsat_bu: + case LoongArch::BI__builtin_lsx_vslli_b: + case LoongArch::BI__builtin_lsx_vsrai_b: + case LoongArch::BI__builtin_lsx_vsrari_b: + case LoongArch::BI__builtin_lsx_vsrli_b: + case LoongArch::BI__builtin_lsx_vsllwil_h_b: + case LoongArch::BI__builtin_lsx_vsllwil_hu_bu: + case LoongArch::BI__builtin_lsx_vrotri_b: + case LoongArch::BI__builtin_lsx_vsrlri_b: + return SemaBuiltinConstantArgRange(TheCall, 1, 0, 7); + case LoongArch::BI__builtin_lsx_vbitclri_h: + case LoongArch::BI__builtin_lsx_vbitrevi_h: + case LoongArch::BI__builtin_lsx_vbitseti_h: + case LoongArch::BI__builtin_lsx_vsat_h: + case LoongArch::BI__builtin_lsx_vsat_hu: + case LoongArch::BI__builtin_lsx_vslli_h: + case LoongArch::BI__builtin_lsx_vsrai_h: + case LoongArch::BI__builtin_lsx_vsrari_h: + case LoongArch::BI__builtin_lsx_vsrli_h: + case LoongArch::BI__builtin_lsx_vsllwil_w_h: + case LoongArch::BI__builtin_lsx_vsllwil_wu_hu: + case LoongArch::BI__builtin_lsx_vrotri_h: + case LoongArch::BI__builtin_lsx_vsrlri_h: + return SemaBuiltinConstantArgRange(TheCall, 1, 0, 15); + case LoongArch::BI__builtin_lsx_vssrarni_b_h: + case LoongArch::BI__builtin_lsx_vssrarni_bu_h: + case LoongArch::BI__builtin_lsx_vssrani_b_h: + case LoongArch::BI__builtin_lsx_vssrani_bu_h: + case LoongArch::BI__builtin_lsx_vsrarni_b_h: + case LoongArch::BI__builtin_lsx_vsrlni_b_h: + case LoongArch::BI__builtin_lsx_vsrlrni_b_h: + case LoongArch::BI__builtin_lsx_vssrlni_b_h: + case LoongArch::BI__builtin_lsx_vssrlni_bu_h: + case LoongArch::BI__builtin_lsx_vssrlrni_b_h: + case LoongArch::BI__builtin_lsx_vssrlrni_bu_h: + case LoongArch::BI__builtin_lsx_vsrani_b_h: + return SemaBuiltinConstantArgRange(TheCall, 2, 0, 15); + case LoongArch::BI__builtin_lsx_vslei_bu: + case LoongArch::BI__builtin_lsx_vslei_hu: + case LoongArch::BI__builtin_lsx_vslei_wu: + case LoongArch::BI__builtin_lsx_vslei_du: + case LoongArch::BI__builtin_lsx_vslti_bu: + case LoongArch::BI__builtin_lsx_vslti_hu: + case LoongArch::BI__builtin_lsx_vslti_wu: + case LoongArch::BI__builtin_lsx_vslti_du: + case LoongArch::BI__builtin_lsx_vmaxi_bu: + case LoongArch::BI__builtin_lsx_vmaxi_hu: + case LoongArch::BI__builtin_lsx_vmaxi_wu: + case LoongArch::BI__builtin_lsx_vmaxi_du: + case LoongArch::BI__builtin_lsx_vmini_bu: + case LoongArch::BI__builtin_lsx_vmini_hu: + case LoongArch::BI__builtin_lsx_vmini_wu: + case LoongArch::BI__builtin_lsx_vmini_du: + case LoongArch::BI__builtin_lsx_vaddi_bu: + case LoongArch::BI__builtin_lsx_vaddi_hu: + case LoongArch::BI__builtin_lsx_vaddi_wu: + case LoongArch::BI__builtin_lsx_vaddi_du: + case LoongArch::BI__builtin_lsx_vbitclri_w: + case LoongArch::BI__builtin_lsx_vbitrevi_w: + case LoongArch::BI__builtin_lsx_vbitseti_w: + case LoongArch::BI__builtin_lsx_vsat_w: + case LoongArch::BI__builtin_lsx_vsat_wu: + case LoongArch::BI__builtin_lsx_vslli_w: + case LoongArch::BI__builtin_lsx_vsrai_w: + case LoongArch::BI__builtin_lsx_vsrari_w: + case LoongArch::BI__builtin_lsx_vsrli_w: + case LoongArch::BI__builtin_lsx_vsllwil_d_w: + case LoongArch::BI__builtin_lsx_vsllwil_du_wu: + case LoongArch::BI__builtin_lsx_vsrlri_w: + case LoongArch::BI__builtin_lsx_vrotri_w: + case LoongArch::BI__builtin_lsx_vsubi_bu: + case LoongArch::BI__builtin_lsx_vsubi_hu: + case LoongArch::BI__builtin_lsx_vbsrl_v: + case LoongArch::BI__builtin_lsx_vbsll_v: + case LoongArch::BI__builtin_lsx_vsubi_wu: + case LoongArch::BI__builtin_lsx_vsubi_du: + return SemaBuiltinConstantArgRange(TheCall, 1, 0, 31); + case LoongArch::BI__builtin_lsx_vssrarni_h_w: + case LoongArch::BI__builtin_lsx_vssrarni_hu_w: + case LoongArch::BI__builtin_lsx_vssrani_h_w: + case LoongArch::BI__builtin_lsx_vssrani_hu_w: + case LoongArch::BI__builtin_lsx_vsrarni_h_w: + case LoongArch::BI__builtin_lsx_vsrani_h_w: + case LoongArch::BI__builtin_lsx_vfrstpi_b: + case LoongArch::BI__builtin_lsx_vfrstpi_h: + case LoongArch::BI__builtin_lsx_vsrlni_h_w: + case LoongArch::BI__builtin_lsx_vsrlrni_h_w: + case LoongArch::BI__builtin_lsx_vssrlni_h_w: + case LoongArch::BI__builtin_lsx_vssrlni_hu_w: + case LoongArch::BI__builtin_lsx_vssrlrni_h_w: + case LoongArch::BI__builtin_lsx_vssrlrni_hu_w: + return SemaBuiltinConstantArgRange(TheCall, 2, 0, 31); + case LoongArch::BI__builtin_lsx_vbitclri_d: + case LoongArch::BI__builtin_lsx_vbitrevi_d: + case LoongArch::BI__builtin_lsx_vbitseti_d: + case LoongArch::BI__builtin_lsx_vsat_d: + case LoongArch::BI__builtin_lsx_vsat_du: + case LoongArch::BI__builtin_lsx_vslli_d: + case LoongArch::BI__builtin_lsx_vsrai_d: + case LoongArch::BI__builtin_lsx_vsrli_d: + case LoongArch::BI__builtin_lsx_vsrari_d: + case LoongArch::BI__builtin_lsx_vrotri_d: + case LoongArch::BI__builtin_lsx_vsrlri_d: + return SemaBuiltinConstantArgRange(TheCall, 1, 0, 63); + case LoongArch::BI__builtin_lsx_vssrarni_w_d: + case LoongArch::BI__builtin_lsx_vssrarni_wu_d: + case LoongArch::BI__builtin_lsx_vssrani_w_d: + case LoongArch::BI__builtin_lsx_vssrani_wu_d: + case LoongArch::BI__builtin_lsx_vsrarni_w_d: + case LoongArch::BI__builtin_lsx_vsrlni_w_d: + case LoongArch::BI__builtin_lsx_vsrlrni_w_d: + case LoongArch::BI__builtin_lsx_vssrlni_w_d: + case LoongArch::BI__builtin_lsx_vssrlni_wu_d: + case LoongArch::BI__builtin_lsx_vssrlrni_w_d: + case LoongArch::BI__builtin_lsx_vssrlrni_wu_d: + case LoongArch::BI__builtin_lsx_vsrani_w_d: + return SemaBuiltinConstantArgRange(TheCall, 2, 0, 63); + case LoongArch::BI__builtin_lsx_vssrarni_d_q: + case LoongArch::BI__builtin_lsx_vssrarni_du_q: + case LoongArch::BI__builtin_lsx_vssrani_d_q: + case LoongArch::BI__builtin_lsx_vssrani_du_q: + case LoongArch::BI__builtin_lsx_vsrarni_d_q: + case LoongArch::BI__builtin_lsx_vssrlni_d_q: + case LoongArch::BI__builtin_lsx_vssrlni_du_q: + case LoongArch::BI__builtin_lsx_vssrlrni_d_q: + case LoongArch::BI__builtin_lsx_vssrlrni_du_q: + case LoongArch::BI__builtin_lsx_vsrani_d_q: + case LoongArch::BI__builtin_lsx_vsrlrni_d_q: + case LoongArch::BI__builtin_lsx_vsrlni_d_q: + return SemaBuiltinConstantArgRange(TheCall, 2, 0, 127); + case LoongArch::BI__builtin_lsx_vseqi_b: + case LoongArch::BI__builtin_lsx_vseqi_h: + case LoongArch::BI__builtin_lsx_vseqi_w: + case LoongArch::BI__builtin_lsx_vseqi_d: + case LoongArch::BI__builtin_lsx_vslti_b: + case LoongArch::BI__builtin_lsx_vslti_h: + case LoongArch::BI__builtin_lsx_vslti_w: + case LoongArch::BI__builtin_lsx_vslti_d: + case LoongArch::BI__builtin_lsx_vslei_b: + case LoongArch::BI__builtin_lsx_vslei_h: + case LoongArch::BI__builtin_lsx_vslei_w: + case LoongArch::BI__builtin_lsx_vslei_d: + case LoongArch::BI__builtin_lsx_vmaxi_b: + case LoongArch::BI__builtin_lsx_vmaxi_h: + case LoongArch::BI__builtin_lsx_vmaxi_w: + case LoongArch::BI__builtin_lsx_vmaxi_d: + case LoongArch::BI__builtin_lsx_vmini_b: + case LoongArch::BI__builtin_lsx_vmini_h: + case LoongArch::BI__builtin_lsx_vmini_w: + case LoongArch::BI__builtin_lsx_vmini_d: + return SemaBuiltinConstantArgRange(TheCall, 1, -16, 15); + case LoongArch::BI__builtin_lsx_vandi_b: + case LoongArch::BI__builtin_lsx_vnori_b: + case LoongArch::BI__builtin_lsx_vori_b: + case LoongArch::BI__builtin_lsx_vshuf4i_b: + case LoongArch::BI__builtin_lsx_vshuf4i_h: + case LoongArch::BI__builtin_lsx_vshuf4i_w: + case LoongArch::BI__builtin_lsx_vxori_b: + return SemaBuiltinConstantArgRange(TheCall, 1, 0, 255); + case LoongArch::BI__builtin_lsx_vbitseli_b: + case LoongArch::BI__builtin_lsx_vshuf4i_d: + case LoongArch::BI__builtin_lsx_vextrins_b: + case LoongArch::BI__builtin_lsx_vextrins_h: + case LoongArch::BI__builtin_lsx_vextrins_w: + case LoongArch::BI__builtin_lsx_vextrins_d: + case LoongArch::BI__builtin_lsx_vpermi_w: + return SemaBuiltinConstantArgRange(TheCall, 2, 0, 255); + case LoongArch::BI__builtin_lsx_vpickve2gr_b: + case LoongArch::BI__builtin_lsx_vpickve2gr_bu: + case LoongArch::BI__builtin_lsx_vreplvei_b: + return SemaBuiltinConstantArgRange(TheCall, 1, 0, 15); + case LoongArch::BI__builtin_lsx_vinsgr2vr_b: + return SemaBuiltinConstantArgRange(TheCall, 2, 0, 15); + case LoongArch::BI__builtin_lsx_vpickve2gr_h: + case LoongArch::BI__builtin_lsx_vpickve2gr_hu: + case LoongArch::BI__builtin_lsx_vreplvei_h: + return SemaBuiltinConstantArgRange(TheCall, 1, 0, 7); + case LoongArch::BI__builtin_lsx_vinsgr2vr_h: + return SemaBuiltinConstantArgRange(TheCall, 2, 0, 7); + case LoongArch::BI__builtin_lsx_vpickve2gr_w: + case LoongArch::BI__builtin_lsx_vpickve2gr_wu: + case LoongArch::BI__builtin_lsx_vreplvei_w: + return SemaBuiltinConstantArgRange(TheCall, 1, 0, 3); + case LoongArch::BI__builtin_lsx_vinsgr2vr_w: + return SemaBuiltinConstantArgRange(TheCall, 2, 0, 3); + case LoongArch::BI__builtin_lsx_vpickve2gr_d: + case LoongArch::BI__builtin_lsx_vpickve2gr_du: + case LoongArch::BI__builtin_lsx_vreplvei_d: + return SemaBuiltinConstantArgRange(TheCall, 1, 0, 1); + case LoongArch::BI__builtin_lsx_vinsgr2vr_d: + return SemaBuiltinConstantArgRange(TheCall, 2, 0, 1); + case LoongArch::BI__builtin_lsx_vstelm_b: + return SemaBuiltinConstantArgRange(TheCall, 2, -128, 127) || + SemaBuiltinConstantArgRange(TheCall, 3, 0, 15); + case LoongArch::BI__builtin_lsx_vstelm_h: + return SemaBuiltinConstantArgRange(TheCall, 2, -256, 254) || + SemaBuiltinConstantArgRange(TheCall, 3, 0, 7); + case LoongArch::BI__builtin_lsx_vstelm_w: + return SemaBuiltinConstantArgRange(TheCall, 2, -512, 508) || + SemaBuiltinConstantArgRange(TheCall, 3, 0, 3); + case LoongArch::BI__builtin_lsx_vstelm_d: + return SemaBuiltinConstantArgRange(TheCall, 2, -1024, 1016) || + SemaBuiltinConstantArgRange(TheCall, 3, 0, 1); + case LoongArch::BI__builtin_lsx_vldrepl_b: + case LoongArch::BI__builtin_lsx_vld: + return SemaBuiltinConstantArgRange(TheCall, 1, -2048, 2047); + case LoongArch::BI__builtin_lsx_vldrepl_h: + return SemaBuiltinConstantArgRange(TheCall, 1, -2048, 2046); + case LoongArch::BI__builtin_lsx_vldrepl_w: + return SemaBuiltinConstantArgRange(TheCall, 1, -2048, 2044); + case LoongArch::BI__builtin_lsx_vldrepl_d: + return SemaBuiltinConstantArgRange(TheCall, 1, -2048, 2040); + case LoongArch::BI__builtin_lsx_vst: + return SemaBuiltinConstantArgRange(TheCall, 2, -2048, 2047); + case LoongArch::BI__builtin_lsx_vldi: + return SemaBuiltinConstantArgRange(TheCall, 0, -4096, 4095); + case LoongArch::BI__builtin_lsx_vrepli_b: + case LoongArch::BI__builtin_lsx_vrepli_h: + case LoongArch::BI__builtin_lsx_vrepli_w: + case LoongArch::BI__builtin_lsx_vrepli_d: + return SemaBuiltinConstantArgRange(TheCall, 0, -512, 511); + + // LASX intrinsics. + case LoongArch::BI__builtin_lasx_xvbitclri_b: + case LoongArch::BI__builtin_lasx_xvbitrevi_b: + case LoongArch::BI__builtin_lasx_xvbitseti_b: + case LoongArch::BI__builtin_lasx_xvsat_b: + case LoongArch::BI__builtin_lasx_xvsat_bu: + case LoongArch::BI__builtin_lasx_xvslli_b: + case LoongArch::BI__builtin_lasx_xvsrai_b: + case LoongArch::BI__builtin_lasx_xvsrari_b: + case LoongArch::BI__builtin_lasx_xvsrli_b: + case LoongArch::BI__builtin_lasx_xvsllwil_h_b: + case LoongArch::BI__builtin_lasx_xvsllwil_hu_bu: + case LoongArch::BI__builtin_lasx_xvrotri_b: + case LoongArch::BI__builtin_lasx_xvsrlri_b: + return SemaBuiltinConstantArgRange(TheCall, 1, 0, 7); + case LoongArch::BI__builtin_lasx_xvbitclri_h: + case LoongArch::BI__builtin_lasx_xvbitrevi_h: + case LoongArch::BI__builtin_lasx_xvbitseti_h: + case LoongArch::BI__builtin_lasx_xvsat_h: + case LoongArch::BI__builtin_lasx_xvsat_hu: + case LoongArch::BI__builtin_lasx_xvslli_h: + case LoongArch::BI__builtin_lasx_xvsrai_h: + case LoongArch::BI__builtin_lasx_xvsrari_h: + case LoongArch::BI__builtin_lasx_xvsrli_h: + case LoongArch::BI__builtin_lasx_xvsllwil_w_h: + case LoongArch::BI__builtin_lasx_xvsllwil_wu_hu: + case LoongArch::BI__builtin_lasx_xvrotri_h: + case LoongArch::BI__builtin_lasx_xvsrlri_h: + return SemaBuiltinConstantArgRange(TheCall, 1, 0, 15); + case LoongArch::BI__builtin_lasx_xvssrarni_b_h: + case LoongArch::BI__builtin_lasx_xvssrarni_bu_h: + case LoongArch::BI__builtin_lasx_xvssrani_b_h: + case LoongArch::BI__builtin_lasx_xvssrani_bu_h: + case LoongArch::BI__builtin_lasx_xvsrarni_b_h: + case LoongArch::BI__builtin_lasx_xvsrlni_b_h: + case LoongArch::BI__builtin_lasx_xvsrlrni_b_h: + case LoongArch::BI__builtin_lasx_xvssrlni_b_h: + case LoongArch::BI__builtin_lasx_xvssrlni_bu_h: + case LoongArch::BI__builtin_lasx_xvssrlrni_b_h: + case LoongArch::BI__builtin_lasx_xvssrlrni_bu_h: + case LoongArch::BI__builtin_lasx_xvsrani_b_h: + return SemaBuiltinConstantArgRange(TheCall, 2, 0, 15); + case LoongArch::BI__builtin_lasx_xvslei_bu: + case LoongArch::BI__builtin_lasx_xvslei_hu: + case LoongArch::BI__builtin_lasx_xvslei_wu: + case LoongArch::BI__builtin_lasx_xvslei_du: + case LoongArch::BI__builtin_lasx_xvslti_bu: + case LoongArch::BI__builtin_lasx_xvslti_hu: + case LoongArch::BI__builtin_lasx_xvslti_wu: + case LoongArch::BI__builtin_lasx_xvslti_du: + case LoongArch::BI__builtin_lasx_xvmaxi_bu: + case LoongArch::BI__builtin_lasx_xvmaxi_hu: + case LoongArch::BI__builtin_lasx_xvmaxi_wu: + case LoongArch::BI__builtin_lasx_xvmaxi_du: + case LoongArch::BI__builtin_lasx_xvmini_bu: + case LoongArch::BI__builtin_lasx_xvmini_hu: + case LoongArch::BI__builtin_lasx_xvmini_wu: + case LoongArch::BI__builtin_lasx_xvmini_du: + case LoongArch::BI__builtin_lasx_xvaddi_bu: + case LoongArch::BI__builtin_lasx_xvaddi_hu: + case LoongArch::BI__builtin_lasx_xvaddi_wu: + case LoongArch::BI__builtin_lasx_xvaddi_du: + case LoongArch::BI__builtin_lasx_xvbitclri_w: + case LoongArch::BI__builtin_lasx_xvbitrevi_w: + case LoongArch::BI__builtin_lasx_xvbitseti_w: + case LoongArch::BI__builtin_lasx_xvsat_w: + case LoongArch::BI__builtin_lasx_xvsat_wu: + case LoongArch::BI__builtin_lasx_xvslli_w: + case LoongArch::BI__builtin_lasx_xvsrai_w: + case LoongArch::BI__builtin_lasx_xvsrari_w: + case LoongArch::BI__builtin_lasx_xvsrli_w: + case LoongArch::BI__builtin_lasx_xvsllwil_d_w: + case LoongArch::BI__builtin_lasx_xvsllwil_du_wu: + case LoongArch::BI__builtin_lasx_xvsrlri_w: + case LoongArch::BI__builtin_lasx_xvrotri_w: + case LoongArch::BI__builtin_lasx_xvsubi_bu: + case LoongArch::BI__builtin_lasx_xvsubi_hu: + case LoongArch::BI__builtin_lasx_xvsubi_wu: + case LoongArch::BI__builtin_lasx_xvsubi_du: + case LoongArch::BI__builtin_lasx_xvbsrl_v: + case LoongArch::BI__builtin_lasx_xvbsll_v: + return SemaBuiltinConstantArgRange(TheCall, 1, 0, 31); + case LoongArch::BI__builtin_lasx_xvssrarni_h_w: + case LoongArch::BI__builtin_lasx_xvssrarni_hu_w: + case LoongArch::BI__builtin_lasx_xvssrani_h_w: + case LoongArch::BI__builtin_lasx_xvssrani_hu_w: + case LoongArch::BI__builtin_lasx_xvsrarni_h_w: + case LoongArch::BI__builtin_lasx_xvsrani_h_w: + case LoongArch::BI__builtin_lasx_xvfrstpi_b: + case LoongArch::BI__builtin_lasx_xvfrstpi_h: + case LoongArch::BI__builtin_lasx_xvsrlni_h_w: + case LoongArch::BI__builtin_lasx_xvsrlrni_h_w: + case LoongArch::BI__builtin_lasx_xvssrlni_h_w: + case LoongArch::BI__builtin_lasx_xvssrlni_hu_w: + case LoongArch::BI__builtin_lasx_xvssrlrni_h_w: + case LoongArch::BI__builtin_lasx_xvssrlrni_hu_w: + return SemaBuiltinConstantArgRange(TheCall, 2, 0, 31); + case LoongArch::BI__builtin_lasx_xvbitclri_d: + case LoongArch::BI__builtin_lasx_xvbitrevi_d: + case LoongArch::BI__builtin_lasx_xvbitseti_d: + case LoongArch::BI__builtin_lasx_xvsat_d: + case LoongArch::BI__builtin_lasx_xvsat_du: + case LoongArch::BI__builtin_lasx_xvslli_d: + case LoongArch::BI__builtin_lasx_xvsrai_d: + case LoongArch::BI__builtin_lasx_xvsrli_d: + case LoongArch::BI__builtin_lasx_xvsrari_d: + case LoongArch::BI__builtin_lasx_xvrotri_d: + case LoongArch::BI__builtin_lasx_xvsrlri_d: + return SemaBuiltinConstantArgRange(TheCall, 1, 0, 63); + case LoongArch::BI__builtin_lasx_xvssrarni_w_d: + case LoongArch::BI__builtin_lasx_xvssrarni_wu_d: + case LoongArch::BI__builtin_lasx_xvssrani_w_d: + case LoongArch::BI__builtin_lasx_xvssrani_wu_d: + case LoongArch::BI__builtin_lasx_xvsrarni_w_d: + case LoongArch::BI__builtin_lasx_xvsrlni_w_d: + case LoongArch::BI__builtin_lasx_xvsrlrni_w_d: + case LoongArch::BI__builtin_lasx_xvssrlni_w_d: + case LoongArch::BI__builtin_lasx_xvssrlni_wu_d: + case LoongArch::BI__builtin_lasx_xvssrlrni_w_d: + case LoongArch::BI__builtin_lasx_xvssrlrni_wu_d: + case LoongArch::BI__builtin_lasx_xvsrani_w_d: + return SemaBuiltinConstantArgRange(TheCall, 2, 0, 63); + case LoongArch::BI__builtin_lasx_xvssrarni_d_q: + case LoongArch::BI__builtin_lasx_xvssrarni_du_q: + case LoongArch::BI__builtin_lasx_xvssrani_d_q: + case LoongArch::BI__builtin_lasx_xvssrani_du_q: + case LoongArch::BI__builtin_lasx_xvsrarni_d_q: + case LoongArch::BI__builtin_lasx_xvssrlni_d_q: + case LoongArch::BI__builtin_lasx_xvssrlni_du_q: + case LoongArch::BI__builtin_lasx_xvssrlrni_d_q: + case LoongArch::BI__builtin_lasx_xvssrlrni_du_q: + case LoongArch::BI__builtin_lasx_xvsrani_d_q: + case LoongArch::BI__builtin_lasx_xvsrlni_d_q: + case LoongArch::BI__builtin_lasx_xvsrlrni_d_q: + return SemaBuiltinConstantArgRange(TheCall, 2, 0, 127); + case LoongArch::BI__builtin_lasx_xvseqi_b: + case LoongArch::BI__builtin_lasx_xvseqi_h: + case LoongArch::BI__builtin_lasx_xvseqi_w: + case LoongArch::BI__builtin_lasx_xvseqi_d: + case LoongArch::BI__builtin_lasx_xvslti_b: + case LoongArch::BI__builtin_lasx_xvslti_h: + case LoongArch::BI__builtin_lasx_xvslti_w: + case LoongArch::BI__builtin_lasx_xvslti_d: + case LoongArch::BI__builtin_lasx_xvslei_b: + case LoongArch::BI__builtin_lasx_xvslei_h: + case LoongArch::BI__builtin_lasx_xvslei_w: + case LoongArch::BI__builtin_lasx_xvslei_d: + case LoongArch::BI__builtin_lasx_xvmaxi_b: + case LoongArch::BI__builtin_lasx_xvmaxi_h: + case LoongArch::BI__builtin_lasx_xvmaxi_w: + case LoongArch::BI__builtin_lasx_xvmaxi_d: + case LoongArch::BI__builtin_lasx_xvmini_b: + case LoongArch::BI__builtin_lasx_xvmini_h: + case LoongArch::BI__builtin_lasx_xvmini_w: + case LoongArch::BI__builtin_lasx_xvmini_d: + return SemaBuiltinConstantArgRange(TheCall, 1, -16, 15); + case LoongArch::BI__builtin_lasx_xvandi_b: + case LoongArch::BI__builtin_lasx_xvnori_b: + case LoongArch::BI__builtin_lasx_xvori_b: + case LoongArch::BI__builtin_lasx_xvshuf4i_b: + case LoongArch::BI__builtin_lasx_xvshuf4i_h: + case LoongArch::BI__builtin_lasx_xvshuf4i_w: + case LoongArch::BI__builtin_lasx_xvxori_b: + case LoongArch::BI__builtin_lasx_xvpermi_d: + return SemaBuiltinConstantArgRange(TheCall, 1, 0, 255); + case LoongArch::BI__builtin_lasx_xvbitseli_b: + case LoongArch::BI__builtin_lasx_xvshuf4i_d: + case LoongArch::BI__builtin_lasx_xvextrins_b: + case LoongArch::BI__builtin_lasx_xvextrins_h: + case LoongArch::BI__builtin_lasx_xvextrins_w: + case LoongArch::BI__builtin_lasx_xvextrins_d: + case LoongArch::BI__builtin_lasx_xvpermi_q: + case LoongArch::BI__builtin_lasx_xvpermi_w: + return SemaBuiltinConstantArgRange(TheCall, 2, 0, 255); + case LoongArch::BI__builtin_lasx_xvrepl128vei_b: + return SemaBuiltinConstantArgRange(TheCall, 1, 0, 15); + case LoongArch::BI__builtin_lasx_xvrepl128vei_h: + case LoongArch::BI__builtin_lasx_xvpickve2gr_w: + case LoongArch::BI__builtin_lasx_xvpickve2gr_wu: + case LoongArch::BI__builtin_lasx_xvpickve_w_f: + case LoongArch::BI__builtin_lasx_xvpickve_w: + return SemaBuiltinConstantArgRange(TheCall, 1, 0, 7); + case LoongArch::BI__builtin_lasx_xvinsgr2vr_w: + case LoongArch::BI__builtin_lasx_xvinsve0_w: + return SemaBuiltinConstantArgRange(TheCall, 2, 0, 7); + case LoongArch::BI__builtin_lasx_xvrepl128vei_w: + case LoongArch::BI__builtin_lasx_xvpickve2gr_d: + case LoongArch::BI__builtin_lasx_xvpickve2gr_du: + case LoongArch::BI__builtin_lasx_xvpickve_d_f: + case LoongArch::BI__builtin_lasx_xvpickve_d: + return SemaBuiltinConstantArgRange(TheCall, 1, 0, 3); + case LoongArch::BI__builtin_lasx_xvinsve0_d: + case LoongArch::BI__builtin_lasx_xvinsgr2vr_d: + return SemaBuiltinConstantArgRange(TheCall, 2, 0, 3); + case LoongArch::BI__builtin_lasx_xvstelm_b: + return SemaBuiltinConstantArgRange(TheCall, 2, -128, 127) || + SemaBuiltinConstantArgRange(TheCall, 3, 0, 31); + case LoongArch::BI__builtin_lasx_xvstelm_h: + return SemaBuiltinConstantArgRange(TheCall, 2, -256, 254) || + SemaBuiltinConstantArgRange(TheCall, 3, 0, 15); + case LoongArch::BI__builtin_lasx_xvstelm_w: + return SemaBuiltinConstantArgRange(TheCall, 2, -512, 508) || + SemaBuiltinConstantArgRange(TheCall, 3, 0, 7); + case LoongArch::BI__builtin_lasx_xvstelm_d: + return SemaBuiltinConstantArgRange(TheCall, 2, -1024, 1016) || + SemaBuiltinConstantArgRange(TheCall, 3, 0, 3); + case LoongArch::BI__builtin_lasx_xvrepl128vei_d: + return SemaBuiltinConstantArgRange(TheCall, 1, 0, 1); + case LoongArch::BI__builtin_lasx_xvldrepl_b: + case LoongArch::BI__builtin_lasx_xvld: + return SemaBuiltinConstantArgRange(TheCall, 1, -2048, 2047); + case LoongArch::BI__builtin_lasx_xvldrepl_h: + return SemaBuiltinConstantArgRange(TheCall, 1, -2048, 2046); + case LoongArch::BI__builtin_lasx_xvldrepl_w: + return SemaBuiltinConstantArgRange(TheCall, 1, -2048, 2044); + case LoongArch::BI__builtin_lasx_xvldrepl_d: + return SemaBuiltinConstantArgRange(TheCall, 1, -2048, 2040); + case LoongArch::BI__builtin_lasx_xvst: + return SemaBuiltinConstantArgRange(TheCall, 2, -2048, 2047); + case LoongArch::BI__builtin_lasx_xvldi: + return SemaBuiltinConstantArgRange(TheCall, 0, -4096, 4095); + case LoongArch::BI__builtin_lasx_xvrepli_b: + case LoongArch::BI__builtin_lasx_xvrepli_h: + case LoongArch::BI__builtin_lasx_xvrepli_w: + case LoongArch::BI__builtin_lasx_xvrepli_d: + return SemaBuiltinConstantArgRange(TheCall, 0, -512, 511); + } return false; } diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index 8a105b988c466bae092ee520252d0a7b35bb93bb..76cf2a8584fe1cf13685817dbd3d2931cfc8de40 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -3334,6 +3334,22 @@ static void handleSectionAttr(Sema &S, Decl *D, const ParsedAttr &AL) { } } +static void handleCodeModelAttr(Sema &S, Decl *D, const ParsedAttr &AL) { + StringRef Str; + SourceLocation LiteralLoc; + // Check that it is a string. + if (!S.checkStringLiteralArgumentAttr(AL, 0, Str, &LiteralLoc)) + return; + + llvm::CodeModel::Model CM; + if (!CodeModelAttr::ConvertStrToModel(Str, CM)) { + S.Diag(LiteralLoc, diag::err_attr_codemodel_arg) << Str; + return; + } + + D->addAttr(::new (S.Context) CodeModelAttr(S.Context, AL, CM)); +} + // This is used for `__declspec(code_seg("segname"))` on a decl. // `#pragma code_seg("segname")` uses checkSectionName() instead. static bool checkCodeSegName(Sema &S, SourceLocation LiteralLoc, @@ -8813,6 +8829,9 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL, case ParsedAttr::AT_Section: handleSectionAttr(S, D, AL); break; + case ParsedAttr::AT_CodeModel: + handleCodeModelAttr(S, D, AL); + break; case ParsedAttr::AT_RandomizeLayout: handleRandomizeLayoutAttr(S, D, AL); break; diff --git a/clang/test/CodeGen/LoongArch/abi-lp64d-empty-structs.c b/clang/test/CodeGen/LoongArch/abi-lp64d-empty-structs.c index fb90bf556c19b27ebb2b4f362c96b1b5c01bfe48..2f7596f0ebdc8beeeff104ecbbcab6b8d7db1fe6 100644 --- a/clang/test/CodeGen/LoongArch/abi-lp64d-empty-structs.c +++ b/clang/test/CodeGen/LoongArch/abi-lp64d-empty-structs.c @@ -3,7 +3,7 @@ // RUN: %clang_cc1 -triple loongarch64 -target-feature +f -target-feature +d -target-abi lp64d -emit-llvm %s -o - -x c++ | \ // RUN: FileCheck --check-prefix=CHECK-CXX %s -// Fields containing empty structs or unions are ignored when flattening +// Fields containing empty structs are ignored when flattening // structs to examine whether the structs can be passed via FARs, even in C++. // But there is an exception that non-zero-length array of empty structures are // not ignored in C++. These rules are not documented in psABI @@ -81,9 +81,62 @@ struct s8 test_s8(struct s8 a) { return a; } +/// Note: Below tests check how empty structs are passed while above tests check +/// empty structs as fields of container struct are ignored when flattening +/// structs to examine whether the container structs can be passed via FARs. + // CHECK-C: define{{.*}} void @test_s9() // CHECK-CXX: define{{.*}} i64 @_Z7test_s92s9(i64 {{.*}}) struct s9 { struct empty e; }; struct s9 test_s9(struct s9 a) { return a; } + +// CHECK-C: define{{.*}} void @test_s10() +// CHECK-CXX: define{{.*}} i64 @_Z8test_s103s10(i64 {{.*}}) +struct s10 { }; +struct s10 test_s10(struct s10 a) { + return a; +} + +// CHECK-C: define{{.*}} void @test_s11() +// CHECK-CXX: define{{.*}} i64 @_Z8test_s113s11(i64 {{.*}}) +struct s11 { struct { } s; }; +struct s11 test_s11(struct s11 a) { + return a; +} + +// CHECK-C: define{{.*}} void @test_s12() +// CHECK-CXX: define{{.*}} void @_Z8test_s123s12() +struct s12 { int i[0]; }; +struct s12 test_s12(struct s12 a) { + return a; +} + +// CHECK-C: define{{.*}} void @test_s13() +// CHECK-CXX: define{{.*}} void @_Z8test_s133s13() +struct s13 { struct { } s[0]; }; +struct s13 test_s13(struct s13 a) { + return a; +} + +// CHECK-C: define{{.*}} void @test_s14() +// CHECK-CXX: define{{.*}} i64 @_Z8test_s143s14(i64 {{.*}}) +struct s14 { struct { } s[1]; }; +struct s14 test_s14(struct s14 a) { + return a; +} + +// CHECK-C: define{{.*}} void @test_s15() +// CHECK-CXX: define{{.*}} i64 @_Z8test_s153s15(i64 {{.*}}) +struct s15 { int : 0; }; +struct s15 test_s15(struct s15 a) { + return a; +} + +// CHECK-C: define{{.*}} i64 @test_s16(i64 {{.*}}) +// CHECK-CXX: define{{.*}} i64 @_Z8test_s163s16(i64 {{.*}}) +struct s16 { int : 1; }; +struct s16 test_s16(struct s16 a) { + return a; +} diff --git a/clang/test/CodeGen/LoongArch/abi-lp64d-empty-unions.c b/clang/test/CodeGen/LoongArch/abi-lp64d-empty-unions.c new file mode 100644 index 0000000000000000000000000000000000000000..363e37efb64691ef182c9671bf741039dc7071a8 --- /dev/null +++ b/clang/test/CodeGen/LoongArch/abi-lp64d-empty-unions.c @@ -0,0 +1,25 @@ +// RUN: %clang_cc1 -triple loongarch64 -target-feature +f -target-feature +d -target-abi lp64d -emit-llvm %s -o - | \ +// RUN: FileCheck --check-prefix=CHECK-C %s +// RUN: %clang_cc1 -triple loongarch64 -target-feature +f -target-feature +d -target-abi lp64d -emit-llvm %s -o - -x c++ | \ +// RUN: FileCheck --check-prefix=CHECK-CXX %s + +#include + +// CHECK-C: define{{.*}} void @test1() +// CHECK-CXX: define{{.*}} i64 @_Z5test12u1(i64{{[^,]*}}) +union u1 { }; +union u1 test1(union u1 a) { + return a; +} + +struct s1 { + union u1 u; + int i; + float f; +}; + +// CHECK-C: define{{.*}} { i32, float } @test2(i32{{[^,]*}}, float{{[^,]*}}) +// CHECK-CXX: define{{.*}} [2 x i64] @_Z5test22s1([2 x i64]{{[^,]*}}) +struct s1 test2(struct s1 a) { + return a; +} diff --git a/clang/test/CodeGen/LoongArch/attributes.cpp b/clang/test/CodeGen/LoongArch/attributes.cpp new file mode 100644 index 0000000000000000000000000000000000000000..fb700ad305012be848c1354d8ca3765c11dc3c6f --- /dev/null +++ b/clang/test/CodeGen/LoongArch/attributes.cpp @@ -0,0 +1,34 @@ +// RUN: %clang_cc1 -emit-llvm -triple loongarch64 %s -o - | FileCheck %s + +// CHECK: @_ZL2v1 ={{.*}} global i32 0, code_model "small" +static int v1 __attribute__((model("normal"))); + +void use1() { + v1 = 1; +} + +// CHECK: @v2 ={{.*}} global i32 0, code_model "medium" +int v2 __attribute__((model("medium"))); + +// CHECK: @v3 ={{.*}} global float 0.000000e+00, code_model "large" +float v3 __attribute__((model("extreme"))); + +// CHECK: @_ZL2v4IiE ={{.*}} global i32 0, code_model "medium" +template +static T v4 __attribute__((model("medium"))); + +void use2() { + v4 = 1; +} + +struct S { + double d; +}; + +// CHECK: @v5 ={{.*}} global {{.*}}, code_model "medium" +S v5 __attribute__((model("medium"))); + +typedef void (*F)(); + +// CHECK: @v6 ={{.*}} global ptr null, code_model "large" +F v6 __attribute__((model("extreme"))); diff --git a/clang/test/CodeGen/LoongArch/intrinsic-la32-error.c b/clang/test/CodeGen/LoongArch/intrinsic-la32-error.c index 7551b762e12973768026860954995cd8ddf257af..83eb501d81f907064b88c2580fe5638a9202dd9f 100644 --- a/clang/test/CodeGen/LoongArch/intrinsic-la32-error.c +++ b/clang/test/CodeGen/LoongArch/intrinsic-la32-error.c @@ -1,10 +1,59 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // RUN: %clang_cc1 -triple loongarch32 -emit-llvm -S -verify %s -o /dev/null +// RUN: not %clang_cc1 -triple loongarch32 -DFEATURE_CHECK -emit-llvm %s 2>&1 \ +// RUN: | FileCheck %s #include +#ifdef FEATURE_CHECK +void test_feature(long *v_l, unsigned long *v_ul, int *v_i, unsigned ui, char c, short s) { +// CHECK: error: '__builtin_loongarch_cacop_d' needs target feature 64bit + __builtin_loongarch_cacop_d(1, v_ul[0], 1024); + +// CHECK: error: '__builtin_loongarch_crc_w_b_w' needs target feature 64bit + v_i[0] = __builtin_loongarch_crc_w_b_w(c, v_i[0]); +// CHECK: error: '__builtin_loongarch_crc_w_h_w' needs target feature 64bit + v_i[1] = __builtin_loongarch_crc_w_h_w(c, v_i[0]); +// CHECK: error: '__builtin_loongarch_crc_w_w_w' needs target feature 64bit + v_i[2] = __builtin_loongarch_crc_w_w_w(c, v_i[0]); +// CHECK: error: '__builtin_loongarch_crc_w_d_w' needs target feature 64bit + v_i[3] = __builtin_loongarch_crc_w_d_w(c, v_i[0]); + +// CHECK: error: '__builtin_loongarch_crcc_w_b_w' needs target feature 64bit + v_i[4] = __builtin_loongarch_crcc_w_b_w(c, v_i[0]); +// CHECK: error: '__builtin_loongarch_crcc_w_h_w' needs target feature 64bit + v_i[5] = __builtin_loongarch_crcc_w_h_w(s, v_i[0]); +// CHECK: error: '__builtin_loongarch_crcc_w_w_w' needs target feature 64bit + v_i[6] = __builtin_loongarch_crcc_w_w_w(v_i[0], v_i[1]); +// CHECK: error: '__builtin_loongarch_crcc_w_d_w' needs target feature 64bit + v_i[7] = __builtin_loongarch_crcc_w_d_w(v_l[0], v_i[0]); + +// CHECK: error: '__builtin_loongarch_csrrd_d' needs target feature 64bit + v_ul[0] = __builtin_loongarch_csrrd_d(1); +// CHECK: error: '__builtin_loongarch_csrwr_d' needs target feature 64bit + v_ul[1] = __builtin_loongarch_csrwr_d(v_ul[0], 1); +// CHECK: error: '__builtin_loongarch_csrxchg_d' needs target feature 64bit + v_ul[2] = __builtin_loongarch_csrxchg_d(v_ul[0], v_ul[1], 1); + + +// CHECK: error: '__builtin_loongarch_iocsrrd_d' needs target feature 64bit + v_ul[3] = __builtin_loongarch_iocsrrd_d(ui); +// CHECK: error: '__builtin_loongarch_iocsrwr_d' needs target feature 64bit + __builtin_loongarch_iocsrwr_d(v_ul[0], ui); + +// CHECK: error: '__builtin_loongarch_asrtle_d' needs target feature 64bit + __builtin_loongarch_asrtle_d(v_l[0], v_l[1]); +// CHECK: error: '__builtin_loongarch_asrtgt_d' needs target feature 64bit + __builtin_loongarch_asrtgt_d(v_l[0], v_l[1]); + +// CHECK: error: '__builtin_loongarch_lddir_d' needs target feature 64bit + v_ul[4] = __builtin_loongarch_lddir_d(v_l[0], 1); +// CHECK: error: '__builtin_loongarch_ldpte_d' needs target feature 64bit + __builtin_loongarch_ldpte_d(v_l[0], 1); +} +#endif + void cacop_d(unsigned long int a) { - __builtin_loongarch_cacop_d(1, a, 1024); // expected-error {{this builtin requires target: loongarch64}} __builtin_loongarch_cacop_w(-1, a, 1024); // expected-error {{argument value -1 is outside the valid range [0, 31]}} __builtin_loongarch_cacop_w(32, a, 1024); // expected-error {{argument value 32 is outside the valid range [0, 31]}} __builtin_loongarch_cacop_w(1, a, -4096); // expected-error {{argument value -4096 is outside the valid range [-2048, 2047]}} @@ -48,49 +97,6 @@ void syscall(int a) { __builtin_loongarch_syscall(a); // expected-error {{argument to '__builtin_loongarch_syscall' must be a constant integer}} } -int crc_w_b_w(char a, int b) { - return __builtin_loongarch_crc_w_b_w(a, b); // expected-error {{this builtin requires target: loongarch64}} -} - -int crc_w_h_w(short a, int b) { - return __builtin_loongarch_crc_w_h_w(a, b); // expected-error {{this builtin requires target: loongarch64}} -} - -int crc_w_w_w(int a, int b) { - return __builtin_loongarch_crc_w_w_w(a, b); // expected-error {{this builtin requires target: loongarch64}} -} - -int crc_w_d_w(long int a, int b) { - return __builtin_loongarch_crc_w_d_w(a, b); // expected-error {{this builtin requires target: loongarch64}} -} -int crcc_w_b_w(char a, int b) { - return __builtin_loongarch_crcc_w_b_w(a, b); // expected-error {{this builtin requires target: loongarch64}} -} - -int crcc_w_h_w(short a, int b) { - return __builtin_loongarch_crcc_w_h_w(a, b); // expected-error {{this builtin requires target: loongarch64}} -} - -int crcc_w_w_w(int a, int b) { - return __builtin_loongarch_crcc_w_w_w(a, b); // expected-error {{this builtin requires target: loongarch64}} -} - -int crcc_w_d_w(long int a, int b) { - return __builtin_loongarch_crcc_w_d_w(a, b); // expected-error {{this builtin requires target: loongarch64}} -} - -unsigned long int csrrd_d() { - return __builtin_loongarch_csrrd_d(1); // expected-error {{this builtin requires target: loongarch64}} -} - -unsigned long int csrwr_d(unsigned long int a) { - return __builtin_loongarch_csrwr_d(a, 1); // expected-error {{this builtin requires target: loongarch64}} -} - -unsigned long int csrxchg_d(unsigned long int a, unsigned long int b) { - return __builtin_loongarch_csrxchg_d(a, b, 1); // expected-error {{this builtin requires target: loongarch64}} -} - void csrrd_w(int a) { __builtin_loongarch_csrrd_w(16384); // expected-error {{argument value 16384 is outside the valid range [0, 16383]}} __builtin_loongarch_csrrd_w(-1); // expected-error {{argument value 4294967295 is outside the valid range [0, 16383]}} @@ -109,30 +115,6 @@ void csrxchg_w(unsigned int a, unsigned int b) { __builtin_loongarch_csrxchg_w(a, b, b); // expected-error {{argument to '__builtin_loongarch_csrxchg_w' must be a constant integer}} } -unsigned long int iocsrrd_d(unsigned int a) { - return __builtin_loongarch_iocsrrd_d(a); // expected-error {{this builtin requires target: loongarch64}} -} - -void iocsrwr_d(unsigned long int a, unsigned int b) { - __builtin_loongarch_iocsrwr_d(a, b); // expected-error {{this builtin requires target: loongarch64}} -} - -void asrtle_d(long int a, long int b) { - __builtin_loongarch_asrtle_d(a, b); // expected-error {{this builtin requires target: loongarch64}} -} - -void asrtgt_d(long int a, long int b) { - __builtin_loongarch_asrtgt_d(a, b); // expected-error {{this builtin requires target: loongarch64}} -} - -void lddir_d(long int a, int b) { - __builtin_loongarch_lddir_d(a, 1); // expected-error {{this builtin requires target: loongarch64}} -} - -void ldpte_d(long int a, int b) { - __builtin_loongarch_ldpte_d(a, 1); // expected-error {{this builtin requires target: loongarch64}} -} - void rdtime_d() { __rdtime_d(); // expected-warning {{call to undeclared function '__rdtime_d'}} } diff --git a/clang/test/CodeGen/LoongArch/intrinsic-la32.c b/clang/test/CodeGen/LoongArch/intrinsic-la32.c index 93d54f511a9cd271695ec066c483ff87f0a03c09..eb3f8cbe7ac4cc252e715d09cc7e9847173f5035 100644 --- a/clang/test/CodeGen/LoongArch/intrinsic-la32.c +++ b/clang/test/CodeGen/LoongArch/intrinsic-la32.c @@ -169,8 +169,8 @@ unsigned int cpucfg(unsigned int a) { // LA32-LABEL: @rdtime( // LA32-NEXT: entry: -// LA32-NEXT: [[TMP0:%.*]] = tail call { i32, i32 } asm sideeffect "rdtimeh.w $0, $1\0A\09", "=&r,=&r"() #[[ATTR1:[0-9]+]], !srcloc !2 -// LA32-NEXT: [[TMP1:%.*]] = tail call { i32, i32 } asm sideeffect "rdtimel.w $0, $1\0A\09", "=&r,=&r"() #[[ATTR1]], !srcloc !3 +// LA32-NEXT: [[TMP0:%.*]] = tail call { i32, i32 } asm sideeffect "rdtimeh.w $0, $1\0A\09", "=&r,=&r"() #[[ATTR1:[0-9]+]], !srcloc [[META2:![0-9]+]] +// LA32-NEXT: [[TMP1:%.*]] = tail call { i32, i32 } asm sideeffect "rdtimel.w $0, $1\0A\09", "=&r,=&r"() #[[ATTR1]], !srcloc [[META3:![0-9]+]] // LA32-NEXT: ret void // void rdtime() { @@ -201,13 +201,28 @@ void loongarch_movgr2fcsr(int a) { __builtin_loongarch_movgr2fcsr(1, a); } -// CHECK-LABEL: @cacop_w( -// CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.loongarch.cacop.w(i32 1, i32 [[A:%.*]], i32 1024) -// CHECK-NEXT: tail call void @llvm.loongarch.cacop.w(i32 1, i32 [[A]], i32 1024) -// CHECK-NEXT: ret void +// LA32-LABEL: @cacop_w( +// LA32-NEXT: entry: +// LA32-NEXT: tail call void @llvm.loongarch.cacop.w(i32 1, i32 [[A:%.*]], i32 1024) +// LA32-NEXT: tail call void @llvm.loongarch.cacop.w(i32 1, i32 [[A]], i32 1024) +// LA32-NEXT: ret void // void cacop_w(unsigned long int a) { __cacop_w(1, a, 1024); __builtin_loongarch_cacop_w(1, a, 1024); } + +// LA32-LABEL: @iocsrrd_h_result( +// LA32-NEXT: entry: +// LA32-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.iocsrrd.h(i32 [[A:%.*]]) +// LA32-NEXT: [[CONV_I:%.*]] = trunc i32 [[TMP0]] to i16 +// LA32-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.iocsrrd.h(i32 [[A]]) +// LA32-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 +// LA32-NEXT: [[CONV3:%.*]] = add i16 [[TMP2]], [[CONV_I]] +// LA32-NEXT: ret i16 [[CONV3]] +// +unsigned short iocsrrd_h_result(unsigned int a) { + unsigned short b = __iocsrrd_h(a); + unsigned short c = __builtin_loongarch_iocsrrd_h(a); + return b+c; +} diff --git a/clang/test/CodeGen/LoongArch/intrinsic-la64.c b/clang/test/CodeGen/LoongArch/intrinsic-la64.c index a740882eef5411cbb1940ce1538cbea12a672b2e..50ec358f546ec01ff1c08a3f9695ec701fca9ace 100644 --- a/clang/test/CodeGen/LoongArch/intrinsic-la64.c +++ b/clang/test/CodeGen/LoongArch/intrinsic-la64.c @@ -387,7 +387,7 @@ unsigned int cpucfg(unsigned int a) { // CHECK-LABEL: @rdtime_d( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call { i64, i64 } asm sideeffect "rdtime.d $0, $1\0A\09", "=&r,=&r"() #[[ATTR1:[0-9]+]], !srcloc !2 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { i64, i64 } asm sideeffect "rdtime.d $0, $1\0A\09", "=&r,=&r"() #[[ATTR1:[0-9]+]], !srcloc [[META2:![0-9]+]] // CHECK-NEXT: ret void // void rdtime_d() { @@ -396,8 +396,8 @@ void rdtime_d() { // CHECK-LABEL: @rdtime( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call { i32, i32 } asm sideeffect "rdtimeh.w $0, $1\0A\09", "=&r,=&r"() #[[ATTR1]], !srcloc !3 -// CHECK-NEXT: [[TMP1:%.*]] = tail call { i32, i32 } asm sideeffect "rdtimel.w $0, $1\0A\09", "=&r,=&r"() #[[ATTR1]], !srcloc !4 +// CHECK-NEXT: [[TMP0:%.*]] = tail call { i32, i32 } asm sideeffect "rdtimeh.w $0, $1\0A\09", "=&r,=&r"() #[[ATTR1]], !srcloc [[META3:![0-9]+]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { i32, i32 } asm sideeffect "rdtimel.w $0, $1\0A\09", "=&r,=&r"() #[[ATTR1]], !srcloc [[META4:![0-9]+]] // CHECK-NEXT: ret void // void rdtime() { @@ -427,3 +427,18 @@ void loongarch_movgr2fcsr(int a) { __movgr2fcsr(1, a); __builtin_loongarch_movgr2fcsr(1, a); } + +// CHECK-LABEL: @iocsrrd_h_result( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.loongarch.iocsrrd.h(i32 [[A:%.*]]) +// CHECK-NEXT: [[CONV_I:%.*]] = trunc i32 [[TMP0]] to i16 +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.iocsrrd.h(i32 [[A]]) +// CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 +// CHECK-NEXT: [[CONV3:%.*]] = add i16 [[TMP2]], [[CONV_I]] +// CHECK-NEXT: ret i16 [[CONV3]] +// +unsigned short iocsrrd_h_result(unsigned int a) { + unsigned short b = __iocsrrd_h(a); + unsigned short c = __builtin_loongarch_iocsrrd_h(a); + return b+c; +} diff --git a/clang/test/CodeGen/LoongArch/lasx/builtin-alias-error.c b/clang/test/CodeGen/LoongArch/lasx/builtin-alias-error.c new file mode 100644 index 0000000000000000000000000000000000000000..2a3862bbe3c18cc27d4395c96cae08749d1b0ec3 --- /dev/null +++ b/clang/test/CodeGen/LoongArch/lasx/builtin-alias-error.c @@ -0,0 +1,1373 @@ +// RUN: %clang_cc1 -triple loongarch64 -target-feature +lasx -verify %s + +#include + +v32i8 xvslli_b(v32i8 _1, int var) { + v32i8 res = __lasx_xvslli_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lasx_xvslli_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lasx_xvslli_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvslli_b' must be a constant integer}} + return res; +} + +v16i16 xvslli_h(v16i16 _1, int var) { + v16i16 res = __lasx_xvslli_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lasx_xvslli_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lasx_xvslli_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvslli_h' must be a constant integer}} + return res; +} + +v8i32 xvslli_w(v8i32 _1, int var) { + v8i32 res = __lasx_xvslli_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvslli_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvslli_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvslli_w' must be a constant integer}} + return res; +} + +v4i64 xvslli_d(v4i64 _1, int var) { + v4i64 res = __lasx_xvslli_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lasx_xvslli_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lasx_xvslli_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvslli_d' must be a constant integer}} + return res; +} + +v32i8 xvsrai_b(v32i8 _1, int var) { + v32i8 res = __lasx_xvsrai_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lasx_xvsrai_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lasx_xvsrai_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrai_b' must be a constant integer}} + return res; +} + +v16i16 xvsrai_h(v16i16 _1, int var) { + v16i16 res = __lasx_xvsrai_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lasx_xvsrai_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lasx_xvsrai_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrai_h' must be a constant integer}} + return res; +} + +v8i32 xvsrai_w(v8i32 _1, int var) { + v8i32 res = __lasx_xvsrai_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvsrai_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvsrai_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrai_w' must be a constant integer}} + return res; +} + +v4i64 xvsrai_d(v4i64 _1, int var) { + v4i64 res = __lasx_xvsrai_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lasx_xvsrai_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lasx_xvsrai_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrai_d' must be a constant integer}} + return res; +} + +v32i8 xvsrari_b(v32i8 _1, int var) { + v32i8 res = __lasx_xvsrari_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lasx_xvsrari_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lasx_xvsrari_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrari_b' must be a constant integer}} + return res; +} + +v16i16 xvsrari_h(v16i16 _1, int var) { + v16i16 res = __lasx_xvsrari_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lasx_xvsrari_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lasx_xvsrari_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrari_h' must be a constant integer}} + return res; +} + +v8i32 xvsrari_w(v8i32 _1, int var) { + v8i32 res = __lasx_xvsrari_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvsrari_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvsrari_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrari_w' must be a constant integer}} + return res; +} + +v4i64 xvsrari_d(v4i64 _1, int var) { + v4i64 res = __lasx_xvsrari_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lasx_xvsrari_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lasx_xvsrari_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrari_d' must be a constant integer}} + return res; +} + +v32i8 xvsrli_b(v32i8 _1, int var) { + v32i8 res = __lasx_xvsrli_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lasx_xvsrli_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lasx_xvsrli_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrli_b' must be a constant integer}} + return res; +} + +v16i16 xvsrli_h(v16i16 _1, int var) { + v16i16 res = __lasx_xvsrli_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lasx_xvsrli_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lasx_xvsrli_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrli_h' must be a constant integer}} + return res; +} + +v8i32 xvsrli_w(v8i32 _1, int var) { + v8i32 res = __lasx_xvsrli_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvsrli_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvsrli_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrli_w' must be a constant integer}} + return res; +} + +v4i64 xvsrli_d(v4i64 _1, int var) { + v4i64 res = __lasx_xvsrli_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lasx_xvsrli_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lasx_xvsrli_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrli_d' must be a constant integer}} + return res; +} + +v32i8 xvsrlri_b(v32i8 _1, int var) { + v32i8 res = __lasx_xvsrlri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lasx_xvsrlri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lasx_xvsrlri_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrlri_b' must be a constant integer}} + return res; +} + +v16i16 xvsrlri_h(v16i16 _1, int var) { + v16i16 res = __lasx_xvsrlri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lasx_xvsrlri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lasx_xvsrlri_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrlri_h' must be a constant integer}} + return res; +} + +v8i32 xvsrlri_w(v8i32 _1, int var) { + v8i32 res = __lasx_xvsrlri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvsrlri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvsrlri_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrlri_w' must be a constant integer}} + return res; +} + +v4i64 xvsrlri_d(v4i64 _1, int var) { + v4i64 res = __lasx_xvsrlri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lasx_xvsrlri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lasx_xvsrlri_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrlri_d' must be a constant integer}} + return res; +} + +v32u8 xvbitclri_b(v32u8 _1, int var) { + v32u8 res = __lasx_xvbitclri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lasx_xvbitclri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lasx_xvbitclri_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitclri_b' must be a constant integer}} + return res; +} + +v16u16 xvbitclri_h(v16u16 _1, int var) { + v16u16 res = __lasx_xvbitclri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lasx_xvbitclri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lasx_xvbitclri_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitclri_h' must be a constant integer}} + return res; +} + +v8u32 xvbitclri_w(v8u32 _1, int var) { + v8u32 res = __lasx_xvbitclri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvbitclri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvbitclri_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitclri_w' must be a constant integer}} + return res; +} + +v4u64 xvbitclri_d(v4u64 _1, int var) { + v4u64 res = __lasx_xvbitclri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lasx_xvbitclri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lasx_xvbitclri_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitclri_d' must be a constant integer}} + return res; +} + +v32u8 xvbitseti_b(v32u8 _1, int var) { + v32u8 res = __lasx_xvbitseti_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lasx_xvbitseti_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lasx_xvbitseti_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitseti_b' must be a constant integer}} + return res; +} + +v16u16 xvbitseti_h(v16u16 _1, int var) { + v16u16 res = __lasx_xvbitseti_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lasx_xvbitseti_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lasx_xvbitseti_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitseti_h' must be a constant integer}} + return res; +} + +v8u32 xvbitseti_w(v8u32 _1, int var) { + v8u32 res = __lasx_xvbitseti_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvbitseti_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvbitseti_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitseti_w' must be a constant integer}} + return res; +} + +v4u64 xvbitseti_d(v4u64 _1, int var) { + v4u64 res = __lasx_xvbitseti_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lasx_xvbitseti_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lasx_xvbitseti_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitseti_d' must be a constant integer}} + return res; +} + +v32u8 xvbitrevi_b(v32u8 _1, int var) { + v32u8 res = __lasx_xvbitrevi_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lasx_xvbitrevi_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lasx_xvbitrevi_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitrevi_b' must be a constant integer}} + return res; +} + +v16u16 xvbitrevi_h(v16u16 _1, int var) { + v16u16 res = __lasx_xvbitrevi_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lasx_xvbitrevi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lasx_xvbitrevi_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitrevi_h' must be a constant integer}} + return res; +} + +v8u32 xvbitrevi_w(v8u32 _1, int var) { + v8u32 res = __lasx_xvbitrevi_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvbitrevi_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvbitrevi_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitrevi_w' must be a constant integer}} + return res; +} + +v4u64 xvbitrevi_d(v4u64 _1, int var) { + v4u64 res = __lasx_xvbitrevi_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lasx_xvbitrevi_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lasx_xvbitrevi_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitrevi_d' must be a constant integer}} + return res; +} + +v32i8 xvaddi_bu(v32i8 _1, int var) { + v32i8 res = __lasx_xvaddi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvaddi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvaddi_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvaddi_bu' must be a constant integer}} + return res; +} + +v16i16 xvaddi_hu(v16i16 _1, int var) { + v16i16 res = __lasx_xvaddi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvaddi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvaddi_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvaddi_hu' must be a constant integer}} + return res; +} + +v8i32 xvaddi_wu(v8i32 _1, int var) { + v8i32 res = __lasx_xvaddi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvaddi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvaddi_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvaddi_wu' must be a constant integer}} + return res; +} + +v4i64 xvaddi_du(v4i64 _1, int var) { + v4i64 res = __lasx_xvaddi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvaddi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvaddi_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvaddi_du' must be a constant integer}} + return res; +} + +v32i8 xvsubi_bu(v32i8 _1, int var) { + v32i8 res = __lasx_xvsubi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvsubi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvsubi_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsubi_bu' must be a constant integer}} + return res; +} + +v16i16 xvsubi_hu(v16i16 _1, int var) { + v16i16 res = __lasx_xvsubi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvsubi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvsubi_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsubi_hu' must be a constant integer}} + return res; +} + +v8i32 xvsubi_wu(v8i32 _1, int var) { + v8i32 res = __lasx_xvsubi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvsubi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvsubi_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsubi_wu' must be a constant integer}} + return res; +} + +v4i64 xvsubi_du(v4i64 _1, int var) { + v4i64 res = __lasx_xvsubi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvsubi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvsubi_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvsubi_du' must be a constant integer}} + return res; +} + +v32i8 xvmaxi_b(v32i8 _1, int var) { + v32i8 res = __lasx_xvmaxi_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lasx_xvmaxi_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lasx_xvmaxi_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_b' must be a constant integer}} + return res; +} + +v16i16 xvmaxi_h(v16i16 _1, int var) { + v16i16 res = __lasx_xvmaxi_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lasx_xvmaxi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lasx_xvmaxi_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_h' must be a constant integer}} + return res; +} + +v8i32 xvmaxi_w(v8i32 _1, int var) { + v8i32 res = __lasx_xvmaxi_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lasx_xvmaxi_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lasx_xvmaxi_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_w' must be a constant integer}} + return res; +} + +v4i64 xvmaxi_d(v4i64 _1, int var) { + v4i64 res = __lasx_xvmaxi_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lasx_xvmaxi_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lasx_xvmaxi_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_d' must be a constant integer}} + return res; +} + +v32u8 xvmaxi_bu(v32u8 _1, int var) { + v32u8 res = __lasx_xvmaxi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvmaxi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvmaxi_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_bu' must be a constant integer}} + return res; +} + +v16u16 xvmaxi_hu(v16u16 _1, int var) { + v16u16 res = __lasx_xvmaxi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvmaxi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvmaxi_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_hu' must be a constant integer}} + return res; +} + +v8u32 xvmaxi_wu(v8u32 _1, int var) { + v8u32 res = __lasx_xvmaxi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvmaxi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvmaxi_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_wu' must be a constant integer}} + return res; +} + +v4u64 xvmaxi_du(v4u64 _1, int var) { + v4u64 res = __lasx_xvmaxi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvmaxi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvmaxi_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_du' must be a constant integer}} + return res; +} + +v32i8 xvmini_b(v32i8 _1, int var) { + v32i8 res = __lasx_xvmini_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lasx_xvmini_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lasx_xvmini_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_b' must be a constant integer}} + return res; +} + +v16i16 xvmini_h(v16i16 _1, int var) { + v16i16 res = __lasx_xvmini_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lasx_xvmini_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lasx_xvmini_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_h' must be a constant integer}}} + return res; +} + +v8i32 xvmini_w(v8i32 _1, int var) { + v8i32 res = __lasx_xvmini_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lasx_xvmini_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lasx_xvmini_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_w' must be a constant integer}} + return res; +} + +v4i64 xvmini_d(v4i64 _1, int var) { + v4i64 res = __lasx_xvmini_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lasx_xvmini_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lasx_xvmini_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_d' must be a constant integer}} + return res; +} + +v32u8 xvmini_bu(v32u8 _1, int var) { + v32u8 res = __lasx_xvmini_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvmini_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvmini_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_bu' must be a constant integer}} + return res; +} + +v16u16 xvmini_hu(v16u16 _1, int var) { + v16u16 res = __lasx_xvmini_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvmini_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvmini_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_hu' must be a constant integer}} + return res; +} + +v8u32 xvmini_wu(v8u32 _1, int var) { + v8u32 res = __lasx_xvmini_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvmini_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvmini_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_wu' must be a constant integer}} + return res; +} + +v4u64 xvmini_du(v4u64 _1, int var) { + v4u64 res = __lasx_xvmini_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvmini_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvmini_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_du' must be a constant integer}} + return res; +} + +v32i8 xvseqi_b(v32i8 _1, int var) { + v32i8 res = __lasx_xvseqi_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lasx_xvseqi_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lasx_xvseqi_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvseqi_b' must be a constant integer}} + return res; +} + +v16i16 xvseqi_h(v16i16 _1, int var) { + v16i16 res = __lasx_xvseqi_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lasx_xvseqi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lasx_xvseqi_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvseqi_h' must be a constant integer}} + return res; +} + +v8i32 xvseqi_w(v8i32 _1, int var) { + v8i32 res = __lasx_xvseqi_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lasx_xvseqi_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lasx_xvseqi_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvseqi_w' must be a constant integer}} + return res; +} + +v4i64 xvseqi_d(v4i64 _1, int var) { + v4i64 res = __lasx_xvseqi_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lasx_xvseqi_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lasx_xvseqi_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvseqi_d' must be a constant integer}} + return res; +} + +v32i8 xvslti_b(v32i8 _1, int var) { + v32i8 res = __lasx_xvslti_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lasx_xvslti_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lasx_xvslti_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_b' must be a constant integer}} + return res; +} + +v16i16 xvslti_h(v16i16 _1, int var) { + v16i16 res = __lasx_xvslti_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lasx_xvslti_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lasx_xvslti_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_h' must be a constant integer}} + return res; +} + +v8i32 xvslti_w(v8i32 _1, int var) { + v8i32 res = __lasx_xvslti_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lasx_xvslti_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lasx_xvslti_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_w' must be a constant integer}} + return res; +} + +v4i64 xvslti_d(v4i64 _1, int var) { + v4i64 res = __lasx_xvslti_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lasx_xvslti_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lasx_xvslti_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_d' must be a constant integer}} + return res; +} + +v32i8 xvslti_bu(v32u8 _1, int var) { + v32i8 res = __lasx_xvslti_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvslti_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvslti_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_bu' must be a constant integer}} + return res; +} + +v16i16 xvslti_hu(v16u16 _1, int var) { + v16i16 res = __lasx_xvslti_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvslti_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvslti_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_hu' must be a constant integer}} + return res; +} + +v8i32 xvslti_wu(v8u32 _1, int var) { + v8i32 res = __lasx_xvslti_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvslti_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvslti_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_wu' must be a constant integer}} + return res; +} + +v4i64 xvslti_du(v4u64 _1, int var) { + v4i64 res = __lasx_xvslti_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvslti_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvslti_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_du' must be a constant integer}} + return res; +} + +v32i8 xvslei_b(v32i8 _1, int var) { + v32i8 res = __lasx_xvslei_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lasx_xvslei_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lasx_xvslei_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_b' must be a constant integer}} + return res; +} + +v16i16 xvslei_h(v16i16 _1, int var) { + v16i16 res = __lasx_xvslei_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lasx_xvslei_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lasx_xvslei_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_h' must be a constant integer}} + return res; +} + +v8i32 xvslei_w(v8i32 _1, int var) { + v8i32 res = __lasx_xvslei_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lasx_xvslei_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lasx_xvslei_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_w' must be a constant integer}} + return res; +} + +v4i64 xvslei_d(v4i64 _1, int var) { + v4i64 res = __lasx_xvslei_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lasx_xvslei_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lasx_xvslei_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_d' must be a constant integer}} + return res; +} + +v32i8 xvslei_bu(v32u8 _1, int var) { + v32i8 res = __lasx_xvslei_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvslei_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvslei_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_bu' must be a constant integer}} + return res; +} + +v16i16 xvslei_hu(v16u16 _1, int var) { + v16i16 res = __lasx_xvslei_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvslei_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvslei_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_hu' must be a constant integer}} + return res; +} + +v8i32 xvslei_wu(v8u32 _1, int var) { + v8i32 res = __lasx_xvslei_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvslei_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvslei_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_wu' must be a constant integer}} + return res; +} + +v4i64 xvslei_du(v4u64 _1, int var) { + v4i64 res = __lasx_xvslei_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvslei_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvslei_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_du' must be a constant integer}} + return res; +} + +v32i8 xvsat_b(v32i8 _1, int var) { + v32i8 res = __lasx_xvsat_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lasx_xvsat_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lasx_xvsat_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_b' must be a constant integer}} + return res; +} + +v16i16 xvsat_h(v16i16 _1, int var) { + v16i16 res = __lasx_xvsat_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lasx_xvsat_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lasx_xvsat_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_h' must be a constant integer}} + return res; +} + +v8i32 xvsat_w(v8i32 _1, int var) { + v8i32 res = __lasx_xvsat_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvsat_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvsat_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_w' must be a constant integer}} + return res; +} + +v4i64 xvsat_d(v4i64 _1, int var) { + v4i64 res = __lasx_xvsat_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lasx_xvsat_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lasx_xvsat_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_d' must be a constant integer}} + return res; +} + +v32u8 xvsat_bu(v32u8 _1, int var) { + v32u8 res = __lasx_xvsat_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lasx_xvsat_bu(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lasx_xvsat_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_bu' must be a constant integer}} + return res; +} + +v16u16 xvsat_hu(v16u16 _1, int var) { + v16u16 res = __lasx_xvsat_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lasx_xvsat_hu(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lasx_xvsat_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_hu' must be a constant integer}} + return res; +} + +v8u32 xvsat_wu(v8u32 _1, int var) { + v8u32 res = __lasx_xvsat_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvsat_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvsat_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_wu' must be a constant integer}} + return res; +} + +v4u64 xvsat_du(v4u64 _1, int var) { + v4u64 res = __lasx_xvsat_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lasx_xvsat_du(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lasx_xvsat_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_du' must be a constant integer}} + return res; +} + +v32i8 xvrepl128vei_b(v32i8 _1, int var) { + v32i8 res = __lasx_xvrepl128vei_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lasx_xvrepl128vei_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lasx_xvrepl128vei_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvrepl128vei_b' must be a constant integer}} + return res; +} + +v16i16 xvrepl128vei_h(v16i16 _1, int var) { + v16i16 res = __lasx_xvrepl128vei_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lasx_xvrepl128vei_h(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lasx_xvrepl128vei_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvrepl128vei_h' must be a constant integer}} + return res; +} + +v8i32 xvrepl128vei_w(v8i32 _1, int var) { + v8i32 res = __lasx_xvrepl128vei_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} + res |= __lasx_xvrepl128vei_w(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + res |= __lasx_xvrepl128vei_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvrepl128vei_w' must be a constant integer}} + return res; +} + +v4i64 xvrepl128vei_d(v4i64 _1, int var) { + v4i64 res = __lasx_xvrepl128vei_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 1]}} + res |= __lasx_xvrepl128vei_d(_1, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} + res |= __lasx_xvrepl128vei_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvrepl128vei_d' must be a constant integer}} + return res; +} + +v32u8 xvandi_b(v32u8 _1, int var) { + v32u8 res = __lasx_xvandi_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __lasx_xvandi_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __lasx_xvandi_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvandi_b' must be a constant integer}} + return res; +} + +v32u8 xvori_b(v32u8 _1, int var) { + v32u8 res = __lasx_xvori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __lasx_xvori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __lasx_xvori_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvori_b' must be a constant integer}} + return res; +} + +v32u8 xvnori_b(v32u8 _1, int var) { + v32u8 res = __lasx_xvnori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __lasx_xvnori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __lasx_xvnori_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvnori_b' must be a constant integer}} + return res; +} + +v32u8 xvxori_b(v32u8 _1, int var) { + v32u8 res = __lasx_xvxori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __lasx_xvxori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __lasx_xvxori_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvxori_b' must be a constant integer}} + return res; +} + +v32u8 xvbitseli_b(v32u8 _1, v32u8 _2, int var) { + v32u8 res = __lasx_xvbitseli_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __lasx_xvbitseli_b(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __lasx_xvbitseli_b(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvbitseli_b' must be a constant integer}} + return res; +} + +v32i8 xvshuf4i_b(v32i8 _1, int var) { + v32i8 res = __lasx_xvshuf4i_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __lasx_xvshuf4i_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __lasx_xvshuf4i_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvshuf4i_b' must be a constant integer}} + return res; +} + +v16i16 xvshuf4i_h(v16i16 _1, int var) { + v16i16 res = __lasx_xvshuf4i_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __lasx_xvshuf4i_h(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __lasx_xvshuf4i_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvshuf4i_h' must be a constant integer}} + return res; +} + +v8i32 xvshuf4i_w(v8i32 _1, int var) { + v8i32 res = __lasx_xvshuf4i_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __lasx_xvshuf4i_w(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __lasx_xvshuf4i_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvshuf4i_w' must be a constant integer}} + return res; +} + +v4i64 xvshuf4i_d(v4i64 _1, v4i64 _2, int var) { + v4i64 res = __lasx_xvshuf4i_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __lasx_xvshuf4i_d(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __lasx_xvshuf4i_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvshuf4i_d' must be a constant integer}} + return res; +} + +v8i32 xvpermi_w(v8i32 _1, v8i32 _2, int var) { + v8i32 res = __lasx_xvpermi_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __lasx_xvpermi_w(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __lasx_xvpermi_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvpermi_w' must be a constant integer}} + return res; +} + +v4i64 xvpermi_d(v4i64 _1, int var) { + v4i64 res = __lasx_xvpermi_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __lasx_xvpermi_d(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __lasx_xvpermi_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvpermi_d' must be a constant integer}} + return res; +} + +v32i8 xvpermi_q(v32i8 _1, v32i8 _2, int var) { + v32i8 res = __lasx_xvpermi_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __lasx_xvpermi_q(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __lasx_xvpermi_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvpermi_q' must be a constant integer}} + return res; +} + +v16i16 xvsllwil_h_b(v32i8 _1, int var) { + v16i16 res = __lasx_xvsllwil_h_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lasx_xvsllwil_h_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lasx_xvsllwil_h_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_h_b' must be a constant integer}} + return res; +} + +v8i32 xvsllwil_w_h(v16i16 _1, int var) { + v8i32 res = __lasx_xvsllwil_w_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lasx_xvsllwil_w_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lasx_xvsllwil_w_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_w_h' must be a constant integer}} + return res; +} + +v4i64 xvsllwil_d_w(v8i32 _1, int var) { + v4i64 res = __lasx_xvsllwil_d_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvsllwil_d_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvsllwil_d_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_d_w' must be a constant integer}} + return res; +} + +v16u16 xvsllwil_hu_bu(v32u8 _1, int var) { + v16u16 res = __lasx_xvsllwil_hu_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lasx_xvsllwil_hu_bu(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lasx_xvsllwil_hu_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_hu_bu' must be a constant integer}} + return res; +} + +v8u32 xvsllwil_wu_hu(v16u16 _1, int var) { + v8u32 res = __lasx_xvsllwil_wu_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lasx_xvsllwil_wu_hu(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lasx_xvsllwil_wu_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_wu_hu' must be a constant integer}} + return res; +} + +v4u64 xvsllwil_du_wu(v8u32 _1, int var) { + v4u64 res = __lasx_xvsllwil_du_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvsllwil_du_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvsllwil_du_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_du_wu' must be a constant integer}} + return res; +} + +v32i8 xvfrstpi_b(v32i8 _1, v32i8 _2, int var) { + v32i8 res = __lasx_xvfrstpi_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvfrstpi_b(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvfrstpi_b(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvfrstpi_b' must be a constant integer}} + return res; +} + +v16i16 xvfrstpi_h(v16i16 _1, v16i16 _2, int var) { + v16i16 res = __lasx_xvfrstpi_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvfrstpi_h(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvfrstpi_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvfrstpi_h' must be a constant integer}} + return res; +} + +v32i8 xvbsrl_v(v32i8 _1, int var) { + v32i8 res = __lasx_xvbsrl_v(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvbsrl_v(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvbsrl_v(_1, var); // expected-error {{argument to '__builtin_lasx_xvbsrl_v' must be a constant integer}} + return res; +} + +v32i8 xvbsll_v(v32i8 _1, int var) { + v32i8 res = __lasx_xvbsll_v(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvbsll_v(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvbsll_v(_1, var); // expected-error {{argument to '__builtin_lasx_xvbsll_v' must be a constant integer}} + return res; +} + +v32i8 xvextrins_b(v32i8 _1, v32i8 _2, int var) { + v32i8 res = __lasx_xvextrins_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __lasx_xvextrins_b(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __lasx_xvextrins_b(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvextrins_b' must be a constant integer}} + return res; +} + +v16i16 xvextrins_h(v16i16 _1, v16i16 _2, int var) { + v16i16 res = __lasx_xvextrins_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __lasx_xvextrins_h(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __lasx_xvextrins_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvextrins_h' must be a constant integer}} + return res; +} + +v8i32 xvextrins_w(v8i32 _1, v8i32 _2, int var) { + v8i32 res = __lasx_xvextrins_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __lasx_xvextrins_w(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __lasx_xvextrins_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvextrins_w' must be a constant integer}} + return res; +} + +v4i64 xvextrins_d(v4i64 _1, v4i64 _2, int var) { + v4i64 res = __lasx_xvextrins_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __lasx_xvextrins_d(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __lasx_xvextrins_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvextrins_d' must be a constant integer}} + return res; +} + +v32i8 xvld(void *_1, int var) { + v32i8 res = __lasx_xvld(_1, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} + res |= __lasx_xvld(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2047]}} + res |= __lasx_xvld(_1, var); // expected-error {{argument to '__builtin_lasx_xvld' must be a constant integer}} + return res; +} + +void xvst(v32i8 _1, void *_2, int var) { + __lasx_xvst(_1, _2, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} + __lasx_xvst(_1, _2, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2047]}} + __lasx_xvst(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvst' must be a constant integer}} +} + +void xvstelm_b(v32i8 _1, void * _2, int var) { + __lasx_xvstelm_b(_1, _2, -129, 1); // expected-error {{argument value -129 is outside the valid range [-128, 127]}} + __lasx_xvstelm_b(_1, _2, 128, 1); // expected-error {{argument value 128 is outside the valid range [-128, 127]}} + __lasx_xvstelm_b(_1, _2, var, 1); // expected-error {{argument to '__builtin_lasx_xvstelm_b' must be a constant integer}} +} + +void xvstelm_h(v16i16 _1, void * _2, int var) { + __lasx_xvstelm_h(_1, _2, -258, 1); // expected-error {{argument value -258 is outside the valid range [-256, 254]}} + __lasx_xvstelm_h(_1, _2, 256, 1); // expected-error {{argument value 256 is outside the valid range [-256, 254]}} + __lasx_xvstelm_h(_1, _2, var, 1); // expected-error {{argument to '__builtin_lasx_xvstelm_h' must be a constant integer}} +} + +void xvstelm_w(v8i32 _1, void * _2, int var) { + __lasx_xvstelm_w(_1, _2, -516, 1); // expected-error {{argument value -516 is outside the valid range [-512, 508]}} + __lasx_xvstelm_w(_1, _2, 512, 1); // expected-error {{argument value 512 is outside the valid range [-512, 508]}} + __lasx_xvstelm_w(_1, _2, var, 1); // expected-error {{argument to '__builtin_lasx_xvstelm_w' must be a constant integer}} +} + +void xvstelm_d(v4i64 _1, void * _2, int var) { + __lasx_xvstelm_d(_1, _2, -1032, 1); // expected-error {{argument value -1032 is outside the valid range [-1024, 1016]}} + __lasx_xvstelm_d(_1, _2, 1024, 1); // expected-error {{argument value 1024 is outside the valid range [-1024, 1016]}} + __lasx_xvstelm_d(_1, _2, var, 1); // expected-error {{argument to '__builtin_lasx_xvstelm_d' must be a constant integer}} +} + +void xvstelm_b_idx(v32i8 _1, void * _2, int var) { + __lasx_xvstelm_b(_1, _2, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + __lasx_xvstelm_b(_1, _2, 1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + __lasx_xvstelm_b(_1, _2, 1, var); // expected-error {{argument to '__builtin_lasx_xvstelm_b' must be a constant integer}} +} + +void xvstelm_h_idx(v16i16 _1, void * _2, int var) { + __lasx_xvstelm_h(_1, _2, 2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + __lasx_xvstelm_h(_1, _2, 2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + __lasx_xvstelm_h(_1, _2, 2, var); // expected-error {{argument to '__builtin_lasx_xvstelm_h' must be a constant integer}} +} + +void xvstelm_w_idx(v8i32 _1, void * _2, int var) { + __lasx_xvstelm_w(_1, _2, 4, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + __lasx_xvstelm_w(_1, _2, 4, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + __lasx_xvstelm_w(_1, _2, 4, var); // expected-error {{argument to '__builtin_lasx_xvstelm_w' must be a constant integer}} +} + +void xvstelm_d_idx(v4i64 _1, void * _2, int var) { + __lasx_xvstelm_d(_1, _2, 8, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} + __lasx_xvstelm_d(_1, _2, 8, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + __lasx_xvstelm_d(_1, _2, 8, var); // expected-error {{argument to '__builtin_lasx_xvstelm_d' must be a constant integer}} +} + +v8i32 xvinsve0_w(v8i32 _1, v8i32 _2, int var) { + v8i32 res = __lasx_xvinsve0_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lasx_xvinsve0_w(_1, _2, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lasx_xvinsve0_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvinsve0_w' must be a constant integer}} + return res; +} + +v4i64 xvinsve0_d(v4i64 _1, v4i64 _2, int var) { + v4i64 res = __lasx_xvinsve0_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} + res |= __lasx_xvinsve0_d(_1, _2, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + res |= __lasx_xvinsve0_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvinsve0_d' must be a constant integer}} + return res; +} + +v8i32 xvpickve_w(v8i32 _1, int var) { + v8i32 res = __lasx_xvpickve_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lasx_xvpickve_w(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lasx_xvpickve_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve_w' must be a constant integer}} + return res; +} + +v4i64 xvpickve_d(v4i64 _1, int var) { + v4i64 res = __lasx_xvpickve_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} + res |= __lasx_xvpickve_d(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + res |= __lasx_xvpickve_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve_d' must be a constant integer}} + return res; +} + +v4i64 xvldi(int var) { + v4i64 res = __lasx_xvldi(-4097); // expected-error {{argument value -4097 is outside the valid range [-4096, 4095]}} + res |= __lasx_xvldi(4096); // expected-error {{argument value 4096 is outside the valid range [-4096, 4095]}} + res |= __lasx_xvldi(var); // expected-error {{argument to '__builtin_lasx_xvldi' must be a constant integer}} + return res; +} + +v8i32 xvinsgr2vr_w(v8i32 _1, int var) { + v8i32 res = __lasx_xvinsgr2vr_w(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lasx_xvinsgr2vr_w(_1, 1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lasx_xvinsgr2vr_w(_1, 1, var); // expected-error {{argument to '__builtin_lasx_xvinsgr2vr_w' must be a constant integer}} + return res; +} + +v4i64 xvinsgr2vr_d(v4i64 _1, int var) { + v4i64 res = __lasx_xvinsgr2vr_d(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} + res |= __lasx_xvinsgr2vr_d(_1, 1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + res |= __lasx_xvinsgr2vr_d(_1, 1, var); // expected-error {{argument to '__builtin_lasx_xvinsgr2vr_d' must be a constant integer}} + return res; +} + +v32i8 xvldrepl_b(void *_1, int var) { + v32i8 res = __lasx_xvldrepl_b(_1, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} + res |= __lasx_xvldrepl_b(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2047]}} + res |= __lasx_xvldrepl_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvldrepl_b' must be a constant integer}} + return res; +} + +v16i16 xvldrepl_h(void *_1, int var) { + v16i16 res = __lasx_xvldrepl_h(_1, -2050); // expected-error {{argument value -2050 is outside the valid range [-2048, 2046]}} + res |= __lasx_xvldrepl_h(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2046]}} + res |= __lasx_xvldrepl_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvldrepl_h' must be a constant integer}} + return res; +} + +v8i32 xvldrepl_w(void *_1, int var) { + v8i32 res = __lasx_xvldrepl_w(_1, -2052); // expected-error {{argument value -2052 is outside the valid range [-2048, 2044]}} + res |= __lasx_xvldrepl_w(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2044]}} + res |= __lasx_xvldrepl_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvldrepl_w' must be a constant integer}} + return res; +} + +v4i64 xvldrepl_d(void *_1, int var) { + v4i64 res = __lasx_xvldrepl_d(_1, -2056); // expected-error {{argument value -2056 is outside the valid range [-2048, 2040]}} + res |= __lasx_xvldrepl_d(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2040]}} + res |= __lasx_xvldrepl_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvldrepl_d' must be a constant integer}} + return res; +} + +int xvpickve2gr_w(v8i32 _1, int var) { + int res = __lasx_xvpickve2gr_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lasx_xvpickve2gr_w(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lasx_xvpickve2gr_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve2gr_w' must be a constant integer}} + return res; +} + +unsigned int xvpickve2gr_wu(v8i32 _1, int var) { + unsigned int res = __lasx_xvpickve2gr_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lasx_xvpickve2gr_wu(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lasx_xvpickve2gr_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve2gr_wu' must be a constant integer}} + return res; +} + +long xvpickve2gr_d(v4i64 _1, int var) { + long res = __lasx_xvpickve2gr_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} + res |= __lasx_xvpickve2gr_d(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + res |= __lasx_xvpickve2gr_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve2gr_d' must be a constant integer}} + return res; +} + +unsigned long int xvpickve2gr_du(v4i64 _1, int var) { + unsigned long int res = __lasx_xvpickve2gr_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} + res |= __lasx_xvpickve2gr_du(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + res |= __lasx_xvpickve2gr_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve2gr_du' must be a constant integer}} + return res; +} + +v32i8 xvrotri_b(v32i8 _1, int var) { + v32i8 res = __lasx_xvrotri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lasx_xvrotri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lasx_xvrotri_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvrotri_b' must be a constant integer}} + return res; +} + +v16i16 xvrotri_h(v16i16 _1, int var) { + v16i16 res = __lasx_xvrotri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lasx_xvrotri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lasx_xvrotri_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvrotri_h' must be a constant integer}} + return res; +} + +v8i32 xvrotri_w(v8i32 _1, int var) { + v8i32 res = __lasx_xvrotri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvrotri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvrotri_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvrotri_w' must be a constant integer}} + return res; +} + +v4i64 xvrotri_d(v4i64 _1, int var) { + v4i64 res = __lasx_xvrotri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lasx_xvrotri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lasx_xvrotri_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvrotri_d' must be a constant integer}} + return res; +} + +v32i8 xvsrlni_b_h(v32i8 _1, v32i8 _2, int var) { + v32i8 res = __lasx_xvsrlni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lasx_xvsrlni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lasx_xvsrlni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlni_b_h' must be a constant integer}} + return res; +} + +v16i16 xvsrlni_h_w(v16i16 _1, v16i16 _2, int var) { + v16i16 res = __lasx_xvsrlni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvsrlni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvsrlni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlni_h_w' must be a constant integer}} + return res; +} + +v8i32 xvsrlni_w_d(v8i32 _1, v8i32 _2, int var) { + v8i32 res = __lasx_xvsrlni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lasx_xvsrlni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lasx_xvsrlni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlni_w_d' must be a constant integer}} + return res; +} + +v4i64 xvsrlni_d_q(v4i64 _1, v4i64 _2, int var) { + v4i64 res = __lasx_xvsrlni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __lasx_xvsrlni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __lasx_xvsrlni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlni_d_q' must be a constant integer}} + return res; +} + +v32i8 xvsrlrni_b_h(v32i8 _1, v32i8 _2, int var) { + v32i8 res = __lasx_xvsrlrni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lasx_xvsrlrni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lasx_xvsrlrni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlrni_b_h' must be a constant integer}} + return res; +} + +v16i16 xvsrlrni_h_w(v16i16 _1, v16i16 _2, int var) { + v16i16 res = __lasx_xvsrlrni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvsrlrni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvsrlrni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlrni_h_w' must be a constant integer}} + return res; +} + +v8i32 xvsrlrni_w_d(v8i32 _1, v8i32 _2, int var) { + v8i32 res = __lasx_xvsrlrni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lasx_xvsrlrni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lasx_xvsrlrni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlrni_w_d' must be a constant integer}} + return res; +} + +v4i64 xvsrlrni_d_q(v4i64 _1, v4i64 _2, int var) { + v4i64 res = __lasx_xvsrlrni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __lasx_xvsrlrni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __lasx_xvsrlrni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlrni_d_q' must be a constant integer}} + return res; +} + +v32i8 xvssrlni_b_h(v32i8 _1, v32i8 _2, int var) { + v32i8 res = __lasx_xvssrlni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lasx_xvssrlni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lasx_xvssrlni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_b_h' must be a constant integer}} + return res; +} + +v16i16 xvssrlni_h_w(v16i16 _1, v16i16 _2, int var) { + v16i16 res = __lasx_xvssrlni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvssrlni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvssrlni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_h_w' must be a constant integer}} + return res; +} + +v8i32 xvssrlni_w_d(v8i32 _1, v8i32 _2, int var) { + v8i32 res = __lasx_xvssrlni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lasx_xvssrlni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lasx_xvssrlni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_w_d' must be a constant integer}} + return res; +} + +v4i64 xvssrlni_d_q(v4i64 _1, v4i64 _2, int var) { + v4i64 res = __lasx_xvssrlni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __lasx_xvssrlni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __lasx_xvssrlni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_d_q' must be a constant integer}} + return res; +} + +v32u8 xvssrlni_bu_h(v32u8 _1, v32i8 _2, int var) { + v32u8 res = __lasx_xvssrlni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lasx_xvssrlni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lasx_xvssrlni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_bu_h' must be a constant integer}} + return res; +} + +v16u16 xvssrlni_hu_w(v16u16 _1, v16i16 _2, int var) { + v16u16 res = __lasx_xvssrlni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvssrlni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvssrlni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_hu_w' must be a constant integer}} + return res; +} + +v8u32 xvssrlni_wu_d(v8u32 _1, v8i32 _2, int var) { + v8u32 res = __lasx_xvssrlni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lasx_xvssrlni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lasx_xvssrlni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_wu_d' must be a constant integer}} + return res; +} + +v4u64 xvssrlni_du_q(v4u64 _1, v4i64 _2, int var) { + v4u64 res = __lasx_xvssrlni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __lasx_xvssrlni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __lasx_xvssrlni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_du_q' must be a constant integer}} + return res; +} + +v32i8 xvssrlrni_b_h(v32i8 _1, v32i8 _2, int var) { + v32i8 res = __lasx_xvssrlrni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lasx_xvssrlrni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lasx_xvssrlrni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_b_h' must be a constant integer}} + return res; +} + +v16i16 xvssrlrni_h_w(v16i16 _1, v16i16 _2, int var) { + v16i16 res = __lasx_xvssrlrni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvssrlrni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvssrlrni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_h_w' must be a constant integer}} + return res; +} + +v8i32 xvssrlrni_w_d(v8i32 _1, v8i32 _2, int var) { + v8i32 res = __lasx_xvssrlrni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lasx_xvssrlrni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lasx_xvssrlrni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_w_d' must be a constant integer}} + return res; +} + +v4i64 xvssrlrni_d_q(v4i64 _1, v4i64 _2, int var) { + v4i64 res = __lasx_xvssrlrni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __lasx_xvssrlrni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __lasx_xvssrlrni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_d_q' must be a constant integer}} + return res; +} + +v32u8 xvssrlrni_bu_h(v32u8 _1, v32i8 _2, int var) { + v32u8 res = __lasx_xvssrlrni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lasx_xvssrlrni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lasx_xvssrlrni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_bu_h' must be a constant integer}} + return res; +} + +v16u16 xvssrlrni_hu_w(v16u16 _1, v16i16 _2, int var) { + v16u16 res = __lasx_xvssrlrni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvssrlrni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvssrlrni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_hu_w' must be a constant integer}} + return res; +} + +v8u32 xvssrlrni_wu_d(v8u32 _1, v8i32 _2, int var) { + v8u32 res = __lasx_xvssrlrni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lasx_xvssrlrni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lasx_xvssrlrni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_wu_d' must be a constant integer}} + return res; +} + +v4u64 xvssrlrni_du_q(v4u64 _1, v4i64 _2, int var) { + v4u64 res = __lasx_xvssrlrni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __lasx_xvssrlrni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __lasx_xvssrlrni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_du_q' must be a constant integer}} + return res; +} + +v32i8 xvsrani_b_h(v32i8 _1, v32i8 _2, int var) { + v32i8 res = __lasx_xvsrani_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lasx_xvsrani_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lasx_xvsrani_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrani_b_h' must be a constant integer}} + return res; +} + +v16i16 xvsrani_h_w(v16i16 _1, v16i16 _2, int var) { + v16i16 res = __lasx_xvsrani_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvsrani_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvsrani_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrani_h_w' must be a constant integer}} + return res; +} + +v8i32 xvsrani_w_d(v8i32 _1, v8i32 _2, int var) { + v8i32 res = __lasx_xvsrani_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lasx_xvsrani_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lasx_xvsrani_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrani_w_d' must be a constant integer}} + return res; +} + +v4i64 xvsrani_d_q(v4i64 _1, v4i64 _2, int var) { + v4i64 res = __lasx_xvsrani_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __lasx_xvsrani_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __lasx_xvsrani_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrani_d_q' must be a constant integer}} + return res; +} + +v32i8 xvsrarni_b_h(v32i8 _1, v32i8 _2, int var) { + v32i8 res = __lasx_xvsrarni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lasx_xvsrarni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lasx_xvsrarni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrarni_b_h' must be a constant integer}} + return res; +} + +v16i16 xvsrarni_h_w(v16i16 _1, v16i16 _2, int var) { + v16i16 res = __lasx_xvsrarni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvsrarni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvsrarni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrarni_h_w' must be a constant integer}} + return res; +} + +v8i32 xvsrarni_w_d(v8i32 _1, v8i32 _2, int var) { + v8i32 res = __lasx_xvsrarni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lasx_xvsrarni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lasx_xvsrarni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrarni_w_d' must be a constant integer}} + return res; +} + +v4i64 xvsrarni_d_q(v4i64 _1, v4i64 _2, int var) { + v4i64 res = __lasx_xvsrarni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __lasx_xvsrarni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __lasx_xvsrarni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrarni_d_q' must be a constant integer}} + return res; +} + +v32i8 xvssrani_b_h(v32i8 _1, v32i8 _2, int var) { + v32i8 res = __lasx_xvssrani_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lasx_xvssrani_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lasx_xvssrani_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_b_h' must be a constant integer}} + return res; +} + +v16i16 xvssrani_h_w(v16i16 _1, v16i16 _2, int var) { + v16i16 res = __lasx_xvssrani_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvssrani_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvssrani_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_h_w' must be a constant integer}} + return res; +} + +v8i32 xvssrani_w_d(v8i32 _1, v8i32 _2, int var) { + v8i32 res = __lasx_xvssrani_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lasx_xvssrani_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lasx_xvssrani_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_w_d' must be a constant integer}} + return res; +} + +v4i64 xvssrani_d_q(v4i64 _1, v4i64 _2, int var) { + v4i64 res = __lasx_xvssrani_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __lasx_xvssrani_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __lasx_xvssrani_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_d_q' must be a constant integer}} + return res; +} + +v32u8 xvssrani_bu_h(v32u8 _1, v32i8 _2, int var) { + v32u8 res = __lasx_xvssrani_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lasx_xvssrani_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lasx_xvssrani_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_bu_h' must be a constant integer}} + return res; +} + +v16u16 xvssrani_hu_w(v16u16 _1, v16i16 _2, int var) { + v16u16 res = __lasx_xvssrani_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvssrani_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvssrani_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_hu_w' must be a constant integer}} + return res; +} + +v8u32 xvssrani_wu_d(v8u32 _1, v8i32 _2, int var) { + v8u32 res = __lasx_xvssrani_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lasx_xvssrani_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lasx_xvssrani_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_wu_d' must be a constant integer}} + return res; +} + +v4u64 xvssrani_du_q(v4u64 _1, v4i64 _2, int var) { + v4u64 res = __lasx_xvssrani_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __lasx_xvssrani_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __lasx_xvssrani_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_du_q' must be a constant integer}} + return res; +} + +v32i8 xvssrarni_b_h(v32i8 _1, v32i8 _2, int var) { + v32i8 res = __lasx_xvssrarni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lasx_xvssrarni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lasx_xvssrarni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_b_h' must be a constant integer}} + return res; +} + +v16i16 xvssrarni_h_w(v16i16 _1, v16i16 _2, int var) { + v16i16 res = __lasx_xvssrarni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvssrarni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvssrarni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_h_w' must be a constant integer}} + return res; +} + +v8i32 xvssrarni_w_d(v8i32 _1, v8i32 _2, int var) { + v8i32 res = __lasx_xvssrarni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lasx_xvssrarni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lasx_xvssrarni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_w_d' must be a constant integer}} + return res; +} + +v4i64 xvssrarni_d_q(v4i64 _1, v4i64 _2, int var) { + v4i64 res = __lasx_xvssrarni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __lasx_xvssrarni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __lasx_xvssrarni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_d_q' must be a constant integer}} + return res; +} + +v32u8 xvssrarni_bu_h(v32u8 _1, v32i8 _2, int var) { + v32u8 res = __lasx_xvssrarni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lasx_xvssrarni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lasx_xvssrarni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_bu_h' must be a constant integer}} + return res; +} + +v16u16 xvssrarni_hu_w(v16u16 _1, v16i16 _2, int var) { + v16u16 res = __lasx_xvssrarni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lasx_xvssrarni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lasx_xvssrarni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_hu_w' must be a constant integer}} + return res; +} + +v8u32 xvssrarni_wu_d(v8u32 _1, v8i32 _2, int var) { + v8u32 res = __lasx_xvssrarni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lasx_xvssrarni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lasx_xvssrarni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_wu_d' must be a constant integer}} + return res; +} + +v4u64 xvssrarni_du_q(v4u64 _1, v4i64 _2, int var) { + v4u64 res = __lasx_xvssrarni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __lasx_xvssrarni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __lasx_xvssrarni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_du_q' must be a constant integer}} + return res; +} + +v4f64 xvpickve_d_f(v4f64 _1, int var) { + v4f64 res = __lasx_xvpickve_d_f(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} + res += __lasx_xvpickve_d_f(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + res += __lasx_xvpickve_d_f(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve_d_f' must be a constant integer}} + return res; +} + +v8f32 xvpickve_w_f(v8f32 _1, int var) { + v8f32 res = __lasx_xvpickve_w_f(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res += __lasx_xvpickve_w_f(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res += __lasx_xvpickve_w_f(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve_w_f' must be a constant integer}} + return res; +} + +v32i8 xvrepli_b(int var) { + v32i8 res = __lasx_xvrepli_b(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} + res |= __lasx_xvrepli_b(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} + res |= __lasx_xvrepli_b(var); // expected-error {{argument to '__builtin_lasx_xvrepli_b' must be a constant integer}} + return res; +} + +v4i64 xvrepli_d(int var) { + v4i64 res = __lasx_xvrepli_d(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} + res |= __lasx_xvrepli_d(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} + res |= __lasx_xvrepli_d(var); // expected-error {{argument to '__builtin_lasx_xvrepli_d' must be a constant integer}} + return res; +} + +v16i16 xvrepli_h(int var) { + v16i16 res = __lasx_xvrepli_h(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} + res |= __lasx_xvrepli_h(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} + res |= __lasx_xvrepli_h(var); // expected-error {{argument to '__builtin_lasx_xvrepli_h' must be a constant integer}} + return res; +} + +v8i32 xvrepli_w(int var) { + v8i32 res = __lasx_xvrepli_w(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} + res |= __lasx_xvrepli_w(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} + res |= __lasx_xvrepli_w(var); // expected-error {{argument to '__builtin_lasx_xvrepli_w' must be a constant integer}} + return res; +} diff --git a/clang/test/CodeGen/LoongArch/lasx/builtin-alias.c b/clang/test/CodeGen/LoongArch/lasx/builtin-alias.c new file mode 100644 index 0000000000000000000000000000000000000000..9a8ce224bcfd0910914c54519f8c263d4f6fa379 --- /dev/null +++ b/clang/test/CodeGen/LoongArch/lasx/builtin-alias.c @@ -0,0 +1,6386 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple loongarch64 -target-feature +lasx -O2 -emit-llvm %s -o - | FileCheck %s + +#include + +// CHECK-LABEL: @xvsll_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2:![0-9]+]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsll.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsll_b(v32i8 _1, v32i8 _2) { return __lasx_xvsll_b(_1, _2); } +// CHECK-LABEL: @xvsll_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsll.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsll_h(v16i16 _1, v16i16 _2) { return __lasx_xvsll_h(_1, _2); } +// CHECK-LABEL: @xvsll_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsll.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsll_w(v8i32 _1, v8i32 _2) { return __lasx_xvsll_w(_1, _2); } +// CHECK-LABEL: @xvsll_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsll.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsll_d(v4i64 _1, v4i64 _2) { return __lasx_xvsll_d(_1, _2); } +// CHECK-LABEL: @xvslli_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslli.b(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvslli_b(v32i8 _1) { return __lasx_xvslli_b(_1, 1); } +// CHECK-LABEL: @xvslli_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslli.h(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvslli_h(v16i16 _1) { return __lasx_xvslli_h(_1, 1); } +// CHECK-LABEL: @xvslli_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslli.w(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvslli_w(v8i32 _1) { return __lasx_xvslli_w(_1, 1); } +// CHECK-LABEL: @xvslli_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslli.d(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvslli_d(v4i64 _1) { return __lasx_xvslli_d(_1, 1); } +// CHECK-LABEL: @xvsra_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsra.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsra_b(v32i8 _1, v32i8 _2) { return __lasx_xvsra_b(_1, _2); } +// CHECK-LABEL: @xvsra_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsra.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsra_h(v16i16 _1, v16i16 _2) { return __lasx_xvsra_h(_1, _2); } +// CHECK-LABEL: @xvsra_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsra.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsra_w(v8i32 _1, v8i32 _2) { return __lasx_xvsra_w(_1, _2); } +// CHECK-LABEL: @xvsra_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsra.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsra_d(v4i64 _1, v4i64 _2) { return __lasx_xvsra_d(_1, _2); } +// CHECK-LABEL: @xvsrai_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrai.b(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsrai_b(v32i8 _1) { return __lasx_xvsrai_b(_1, 1); } +// CHECK-LABEL: @xvsrai_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrai.h(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsrai_h(v16i16 _1) { return __lasx_xvsrai_h(_1, 1); } +// CHECK-LABEL: @xvsrai_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrai.w(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsrai_w(v8i32 _1) { return __lasx_xvsrai_w(_1, 1); } +// CHECK-LABEL: @xvsrai_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrai.d(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsrai_d(v4i64 _1) { return __lasx_xvsrai_d(_1, 1); } +// CHECK-LABEL: @xvsrar_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrar.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsrar_b(v32i8 _1, v32i8 _2) { return __lasx_xvsrar_b(_1, _2); } +// CHECK-LABEL: @xvsrar_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrar.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsrar_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrar_h(_1, _2); } +// CHECK-LABEL: @xvsrar_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrar.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsrar_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrar_w(_1, _2); } +// CHECK-LABEL: @xvsrar_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrar.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsrar_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrar_d(_1, _2); } +// CHECK-LABEL: @xvsrari_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrari.b(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsrari_b(v32i8 _1) { return __lasx_xvsrari_b(_1, 1); } +// CHECK-LABEL: @xvsrari_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrari.h(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsrari_h(v16i16 _1) { return __lasx_xvsrari_h(_1, 1); } +// CHECK-LABEL: @xvsrari_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrari.w(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsrari_w(v8i32 _1) { return __lasx_xvsrari_w(_1, 1); } +// CHECK-LABEL: @xvsrari_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrari.d(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsrari_d(v4i64 _1) { return __lasx_xvsrari_d(_1, 1); } +// CHECK-LABEL: @xvsrl_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrl.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsrl_b(v32i8 _1, v32i8 _2) { return __lasx_xvsrl_b(_1, _2); } +// CHECK-LABEL: @xvsrl_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrl.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsrl_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrl_h(_1, _2); } +// CHECK-LABEL: @xvsrl_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrl.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsrl_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrl_w(_1, _2); } +// CHECK-LABEL: @xvsrl_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrl.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsrl_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrl_d(_1, _2); } +// CHECK-LABEL: @xvsrli_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrli.b(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsrli_b(v32i8 _1) { return __lasx_xvsrli_b(_1, 1); } +// CHECK-LABEL: @xvsrli_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrli.h(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsrli_h(v16i16 _1) { return __lasx_xvsrli_h(_1, 1); } +// CHECK-LABEL: @xvsrli_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrli.w(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsrli_w(v8i32 _1) { return __lasx_xvsrli_w(_1, 1); } +// CHECK-LABEL: @xvsrli_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrli.d(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsrli_d(v4i64 _1) { return __lasx_xvsrli_d(_1, 1); } +// CHECK-LABEL: @xvsrlr_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlr.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsrlr_b(v32i8 _1, v32i8 _2) { return __lasx_xvsrlr_b(_1, _2); } +// CHECK-LABEL: @xvsrlr_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlr.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsrlr_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrlr_h(_1, _2); } +// CHECK-LABEL: @xvsrlr_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlr.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsrlr_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrlr_w(_1, _2); } +// CHECK-LABEL: @xvsrlr_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlr.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsrlr_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrlr_d(_1, _2); } +// CHECK-LABEL: @xvsrlri_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlri.b(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsrlri_b(v32i8 _1) { return __lasx_xvsrlri_b(_1, 1); } +// CHECK-LABEL: @xvsrlri_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlri.h(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsrlri_h(v16i16 _1) { return __lasx_xvsrlri_h(_1, 1); } +// CHECK-LABEL: @xvsrlri_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlri.w(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsrlri_w(v8i32 _1) { return __lasx_xvsrlri_w(_1, 1); } +// CHECK-LABEL: @xvsrlri_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlri.d(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsrlri_d(v4i64 _1) { return __lasx_xvsrlri_d(_1, 1); } +// CHECK-LABEL: @xvbitclr_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitclr.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvbitclr_b(v32u8 _1, v32u8 _2) { return __lasx_xvbitclr_b(_1, _2); } +// CHECK-LABEL: @xvbitclr_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitclr.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvbitclr_h(v16u16 _1, v16u16 _2) { return __lasx_xvbitclr_h(_1, _2); } +// CHECK-LABEL: @xvbitclr_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitclr.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvbitclr_w(v8u32 _1, v8u32 _2) { return __lasx_xvbitclr_w(_1, _2); } +// CHECK-LABEL: @xvbitclr_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitclr.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvbitclr_d(v4u64 _1, v4u64 _2) { return __lasx_xvbitclr_d(_1, _2); } +// CHECK-LABEL: @xvbitclri_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitclri.b(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvbitclri_b(v32u8 _1) { return __lasx_xvbitclri_b(_1, 1); } +// CHECK-LABEL: @xvbitclri_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitclri.h(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvbitclri_h(v16u16 _1) { return __lasx_xvbitclri_h(_1, 1); } +// CHECK-LABEL: @xvbitclri_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitclri.w(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvbitclri_w(v8u32 _1) { return __lasx_xvbitclri_w(_1, 1); } +// CHECK-LABEL: @xvbitclri_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitclri.d(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvbitclri_d(v4u64 _1) { return __lasx_xvbitclri_d(_1, 1); } +// CHECK-LABEL: @xvbitset_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitset.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvbitset_b(v32u8 _1, v32u8 _2) { return __lasx_xvbitset_b(_1, _2); } +// CHECK-LABEL: @xvbitset_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitset.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvbitset_h(v16u16 _1, v16u16 _2) { return __lasx_xvbitset_h(_1, _2); } +// CHECK-LABEL: @xvbitset_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitset.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvbitset_w(v8u32 _1, v8u32 _2) { return __lasx_xvbitset_w(_1, _2); } +// CHECK-LABEL: @xvbitset_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitset.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvbitset_d(v4u64 _1, v4u64 _2) { return __lasx_xvbitset_d(_1, _2); } +// CHECK-LABEL: @xvbitseti_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitseti.b(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvbitseti_b(v32u8 _1) { return __lasx_xvbitseti_b(_1, 1); } +// CHECK-LABEL: @xvbitseti_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitseti.h(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvbitseti_h(v16u16 _1) { return __lasx_xvbitseti_h(_1, 1); } +// CHECK-LABEL: @xvbitseti_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitseti.w(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvbitseti_w(v8u32 _1) { return __lasx_xvbitseti_w(_1, 1); } +// CHECK-LABEL: @xvbitseti_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitseti.d(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvbitseti_d(v4u64 _1) { return __lasx_xvbitseti_d(_1, 1); } +// CHECK-LABEL: @xvbitrev_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitrev.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvbitrev_b(v32u8 _1, v32u8 _2) { return __lasx_xvbitrev_b(_1, _2); } +// CHECK-LABEL: @xvbitrev_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitrev.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvbitrev_h(v16u16 _1, v16u16 _2) { return __lasx_xvbitrev_h(_1, _2); } +// CHECK-LABEL: @xvbitrev_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitrev.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvbitrev_w(v8u32 _1, v8u32 _2) { return __lasx_xvbitrev_w(_1, _2); } +// CHECK-LABEL: @xvbitrev_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitrev.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvbitrev_d(v4u64 _1, v4u64 _2) { return __lasx_xvbitrev_d(_1, _2); } +// CHECK-LABEL: @xvbitrevi_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitrevi.b(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvbitrevi_b(v32u8 _1) { return __lasx_xvbitrevi_b(_1, 1); } +// CHECK-LABEL: @xvbitrevi_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitrevi.h(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvbitrevi_h(v16u16 _1) { return __lasx_xvbitrevi_h(_1, 1); } +// CHECK-LABEL: @xvbitrevi_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitrevi.w(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvbitrevi_w(v8u32 _1) { return __lasx_xvbitrevi_w(_1, 1); } +// CHECK-LABEL: @xvbitrevi_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitrevi.d(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvbitrevi_d(v4u64 _1) { return __lasx_xvbitrevi_d(_1, 1); } +// CHECK-LABEL: @xvadd_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvadd.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvadd_b(v32i8 _1, v32i8 _2) { return __lasx_xvadd_b(_1, _2); } +// CHECK-LABEL: @xvadd_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvadd.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvadd_h(v16i16 _1, v16i16 _2) { return __lasx_xvadd_h(_1, _2); } +// CHECK-LABEL: @xvadd_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvadd.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvadd_w(v8i32 _1, v8i32 _2) { return __lasx_xvadd_w(_1, _2); } +// CHECK-LABEL: @xvadd_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadd.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvadd_d(v4i64 _1, v4i64 _2) { return __lasx_xvadd_d(_1, _2); } +// CHECK-LABEL: @xvaddi_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvaddi.bu(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvaddi_bu(v32i8 _1) { return __lasx_xvaddi_bu(_1, 1); } +// CHECK-LABEL: @xvaddi_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddi.hu(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvaddi_hu(v16i16 _1) { return __lasx_xvaddi_hu(_1, 1); } +// CHECK-LABEL: @xvaddi_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddi.wu(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvaddi_wu(v8i32 _1) { return __lasx_xvaddi_wu(_1, 1); } +// CHECK-LABEL: @xvaddi_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddi.du(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvaddi_du(v4i64 _1) { return __lasx_xvaddi_du(_1, 1); } +// CHECK-LABEL: @xvsub_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsub.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsub_b(v32i8 _1, v32i8 _2) { return __lasx_xvsub_b(_1, _2); } +// CHECK-LABEL: @xvsub_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsub.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsub_h(v16i16 _1, v16i16 _2) { return __lasx_xvsub_h(_1, _2); } +// CHECK-LABEL: @xvsub_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsub.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsub_w(v8i32 _1, v8i32 _2) { return __lasx_xvsub_w(_1, _2); } +// CHECK-LABEL: @xvsub_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsub.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsub_d(v4i64 _1, v4i64 _2) { return __lasx_xvsub_d(_1, _2); } +// CHECK-LABEL: @xvsubi_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsubi.bu(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsubi_bu(v32i8 _1) { return __lasx_xvsubi_bu(_1, 1); } +// CHECK-LABEL: @xvsubi_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubi.hu(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsubi_hu(v16i16 _1) { return __lasx_xvsubi_hu(_1, 1); } +// CHECK-LABEL: @xvsubi_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubi.wu(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsubi_wu(v8i32 _1) { return __lasx_xvsubi_wu(_1, 1); } +// CHECK-LABEL: @xvsubi_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubi.du(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsubi_du(v4i64 _1) { return __lasx_xvsubi_du(_1, 1); } +// CHECK-LABEL: @xvmax_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmax.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvmax_b(v32i8 _1, v32i8 _2) { return __lasx_xvmax_b(_1, _2); } +// CHECK-LABEL: @xvmax_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmax.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvmax_h(v16i16 _1, v16i16 _2) { return __lasx_xvmax_h(_1, _2); } +// CHECK-LABEL: @xvmax_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmax.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvmax_w(v8i32 _1, v8i32 _2) { return __lasx_xvmax_w(_1, _2); } +// CHECK-LABEL: @xvmax_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmax.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmax_d(v4i64 _1, v4i64 _2) { return __lasx_xvmax_d(_1, _2); } +// CHECK-LABEL: @xvmaxi_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmaxi.b(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvmaxi_b(v32i8 _1) { return __lasx_xvmaxi_b(_1, 1); } +// CHECK-LABEL: @xvmaxi_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaxi.h(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvmaxi_h(v16i16 _1) { return __lasx_xvmaxi_h(_1, 1); } +// CHECK-LABEL: @xvmaxi_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaxi.w(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvmaxi_w(v8i32 _1) { return __lasx_xvmaxi_w(_1, 1); } +// CHECK-LABEL: @xvmaxi_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaxi.d(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmaxi_d(v4i64 _1) { return __lasx_xvmaxi_d(_1, 1); } +// CHECK-LABEL: @xvmax_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmax.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvmax_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmax_bu(_1, _2); } +// CHECK-LABEL: @xvmax_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmax.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvmax_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmax_hu(_1, _2); } +// CHECK-LABEL: @xvmax_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmax.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvmax_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmax_wu(_1, _2); } +// CHECK-LABEL: @xvmax_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmax.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvmax_du(v4u64 _1, v4u64 _2) { return __lasx_xvmax_du(_1, _2); } +// CHECK-LABEL: @xvmaxi_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmaxi.bu(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvmaxi_bu(v32u8 _1) { return __lasx_xvmaxi_bu(_1, 1); } +// CHECK-LABEL: @xvmaxi_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaxi.hu(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvmaxi_hu(v16u16 _1) { return __lasx_xvmaxi_hu(_1, 1); } +// CHECK-LABEL: @xvmaxi_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaxi.wu(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvmaxi_wu(v8u32 _1) { return __lasx_xvmaxi_wu(_1, 1); } +// CHECK-LABEL: @xvmaxi_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaxi.du(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvmaxi_du(v4u64 _1) { return __lasx_xvmaxi_du(_1, 1); } +// CHECK-LABEL: @xvmin_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmin.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvmin_b(v32i8 _1, v32i8 _2) { return __lasx_xvmin_b(_1, _2); } +// CHECK-LABEL: @xvmin_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmin.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvmin_h(v16i16 _1, v16i16 _2) { return __lasx_xvmin_h(_1, _2); } +// CHECK-LABEL: @xvmin_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmin.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvmin_w(v8i32 _1, v8i32 _2) { return __lasx_xvmin_w(_1, _2); } +// CHECK-LABEL: @xvmin_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmin.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmin_d(v4i64 _1, v4i64 _2) { return __lasx_xvmin_d(_1, _2); } +// CHECK-LABEL: @xvmini_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmini.b(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvmini_b(v32i8 _1) { return __lasx_xvmini_b(_1, 1); } +// CHECK-LABEL: @xvmini_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmini.h(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvmini_h(v16i16 _1) { return __lasx_xvmini_h(_1, 1); } +// CHECK-LABEL: @xvmini_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmini.w(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvmini_w(v8i32 _1) { return __lasx_xvmini_w(_1, 1); } +// CHECK-LABEL: @xvmini_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmini.d(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmini_d(v4i64 _1) { return __lasx_xvmini_d(_1, 1); } +// CHECK-LABEL: @xvmin_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmin.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvmin_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmin_bu(_1, _2); } +// CHECK-LABEL: @xvmin_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmin.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvmin_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmin_hu(_1, _2); } +// CHECK-LABEL: @xvmin_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmin.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvmin_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmin_wu(_1, _2); } +// CHECK-LABEL: @xvmin_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmin.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvmin_du(v4u64 _1, v4u64 _2) { return __lasx_xvmin_du(_1, _2); } +// CHECK-LABEL: @xvmini_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmini.bu(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvmini_bu(v32u8 _1) { return __lasx_xvmini_bu(_1, 1); } +// CHECK-LABEL: @xvmini_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmini.hu(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvmini_hu(v16u16 _1) { return __lasx_xvmini_hu(_1, 1); } +// CHECK-LABEL: @xvmini_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmini.wu(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvmini_wu(v8u32 _1) { return __lasx_xvmini_wu(_1, 1); } +// CHECK-LABEL: @xvmini_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmini.du(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvmini_du(v4u64 _1) { return __lasx_xvmini_du(_1, 1); } +// CHECK-LABEL: @xvseq_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvseq.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvseq_b(v32i8 _1, v32i8 _2) { return __lasx_xvseq_b(_1, _2); } +// CHECK-LABEL: @xvseq_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvseq.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvseq_h(v16i16 _1, v16i16 _2) { return __lasx_xvseq_h(_1, _2); } +// CHECK-LABEL: @xvseq_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvseq.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvseq_w(v8i32 _1, v8i32 _2) { return __lasx_xvseq_w(_1, _2); } +// CHECK-LABEL: @xvseq_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvseq.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvseq_d(v4i64 _1, v4i64 _2) { return __lasx_xvseq_d(_1, _2); } +// CHECK-LABEL: @xvseqi_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvseqi.b(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvseqi_b(v32i8 _1) { return __lasx_xvseqi_b(_1, 1); } +// CHECK-LABEL: @xvseqi_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvseqi.h(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvseqi_h(v16i16 _1) { return __lasx_xvseqi_h(_1, 1); } +// CHECK-LABEL: @xvseqi_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvseqi.w(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvseqi_w(v8i32 _1) { return __lasx_xvseqi_w(_1, 1); } +// CHECK-LABEL: @xvseqi_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvseqi.d(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvseqi_d(v4i64 _1) { return __lasx_xvseqi_d(_1, 1); } +// CHECK-LABEL: @xvslt_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslt.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvslt_b(v32i8 _1, v32i8 _2) { return __lasx_xvslt_b(_1, _2); } +// CHECK-LABEL: @xvslt_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslt.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvslt_h(v16i16 _1, v16i16 _2) { return __lasx_xvslt_h(_1, _2); } +// CHECK-LABEL: @xvslt_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslt.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvslt_w(v8i32 _1, v8i32 _2) { return __lasx_xvslt_w(_1, _2); } +// CHECK-LABEL: @xvslt_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslt.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvslt_d(v4i64 _1, v4i64 _2) { return __lasx_xvslt_d(_1, _2); } +// CHECK-LABEL: @xvslti_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslti.b(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvslti_b(v32i8 _1) { return __lasx_xvslti_b(_1, 1); } +// CHECK-LABEL: @xvslti_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslti.h(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvslti_h(v16i16 _1) { return __lasx_xvslti_h(_1, 1); } +// CHECK-LABEL: @xvslti_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslti.w(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvslti_w(v8i32 _1) { return __lasx_xvslti_w(_1, 1); } +// CHECK-LABEL: @xvslti_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslti.d(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvslti_d(v4i64 _1) { return __lasx_xvslti_d(_1, 1); } +// CHECK-LABEL: @xvslt_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslt.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvslt_bu(v32u8 _1, v32u8 _2) { return __lasx_xvslt_bu(_1, _2); } +// CHECK-LABEL: @xvslt_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslt.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvslt_hu(v16u16 _1, v16u16 _2) { return __lasx_xvslt_hu(_1, _2); } +// CHECK-LABEL: @xvslt_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslt.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvslt_wu(v8u32 _1, v8u32 _2) { return __lasx_xvslt_wu(_1, _2); } +// CHECK-LABEL: @xvslt_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslt.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvslt_du(v4u64 _1, v4u64 _2) { return __lasx_xvslt_du(_1, _2); } +// CHECK-LABEL: @xvslti_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslti.bu(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvslti_bu(v32u8 _1) { return __lasx_xvslti_bu(_1, 1); } +// CHECK-LABEL: @xvslti_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslti.hu(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvslti_hu(v16u16 _1) { return __lasx_xvslti_hu(_1, 1); } +// CHECK-LABEL: @xvslti_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslti.wu(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvslti_wu(v8u32 _1) { return __lasx_xvslti_wu(_1, 1); } +// CHECK-LABEL: @xvslti_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslti.du(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvslti_du(v4u64 _1) { return __lasx_xvslti_du(_1, 1); } +// CHECK-LABEL: @xvsle_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsle.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsle_b(v32i8 _1, v32i8 _2) { return __lasx_xvsle_b(_1, _2); } +// CHECK-LABEL: @xvsle_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsle.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsle_h(v16i16 _1, v16i16 _2) { return __lasx_xvsle_h(_1, _2); } +// CHECK-LABEL: @xvsle_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsle.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsle_w(v8i32 _1, v8i32 _2) { return __lasx_xvsle_w(_1, _2); } +// CHECK-LABEL: @xvsle_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsle.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsle_d(v4i64 _1, v4i64 _2) { return __lasx_xvsle_d(_1, _2); } +// CHECK-LABEL: @xvslei_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslei.b(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvslei_b(v32i8 _1) { return __lasx_xvslei_b(_1, 1); } +// CHECK-LABEL: @xvslei_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslei.h(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvslei_h(v16i16 _1) { return __lasx_xvslei_h(_1, 1); } +// CHECK-LABEL: @xvslei_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslei.w(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvslei_w(v8i32 _1) { return __lasx_xvslei_w(_1, 1); } +// CHECK-LABEL: @xvslei_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslei.d(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvslei_d(v4i64 _1) { return __lasx_xvslei_d(_1, 1); } +// CHECK-LABEL: @xvsle_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsle.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsle_bu(v32u8 _1, v32u8 _2) { return __lasx_xvsle_bu(_1, _2); } +// CHECK-LABEL: @xvsle_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsle.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsle_hu(v16u16 _1, v16u16 _2) { return __lasx_xvsle_hu(_1, _2); } +// CHECK-LABEL: @xvsle_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsle.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsle_wu(v8u32 _1, v8u32 _2) { return __lasx_xvsle_wu(_1, _2); } +// CHECK-LABEL: @xvsle_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsle.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsle_du(v4u64 _1, v4u64 _2) { return __lasx_xvsle_du(_1, _2); } +// CHECK-LABEL: @xvslei_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslei.bu(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvslei_bu(v32u8 _1) { return __lasx_xvslei_bu(_1, 1); } +// CHECK-LABEL: @xvslei_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslei.hu(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvslei_hu(v16u16 _1) { return __lasx_xvslei_hu(_1, 1); } +// CHECK-LABEL: @xvslei_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslei.wu(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvslei_wu(v8u32 _1) { return __lasx_xvslei_wu(_1, 1); } +// CHECK-LABEL: @xvslei_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslei.du(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvslei_du(v4u64 _1) { return __lasx_xvslei_du(_1, 1); } +// CHECK-LABEL: @xvsat_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsat.b(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsat_b(v32i8 _1) { return __lasx_xvsat_b(_1, 1); } +// CHECK-LABEL: @xvsat_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsat.h(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsat_h(v16i16 _1) { return __lasx_xvsat_h(_1, 1); } +// CHECK-LABEL: @xvsat_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsat.w(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsat_w(v8i32 _1) { return __lasx_xvsat_w(_1, 1); } +// CHECK-LABEL: @xvsat_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsat.d(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsat_d(v4i64 _1) { return __lasx_xvsat_d(_1, 1); } +// CHECK-LABEL: @xvsat_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsat.bu(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvsat_bu(v32u8 _1) { return __lasx_xvsat_bu(_1, 1); } +// CHECK-LABEL: @xvsat_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsat.hu(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvsat_hu(v16u16 _1) { return __lasx_xvsat_hu(_1, 1); } +// CHECK-LABEL: @xvsat_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsat.wu(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvsat_wu(v8u32 _1) { return __lasx_xvsat_wu(_1, 1); } +// CHECK-LABEL: @xvsat_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsat.du(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvsat_du(v4u64 _1) { return __lasx_xvsat_du(_1, 1); } +// CHECK-LABEL: @xvadda_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvadda.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvadda_b(v32i8 _1, v32i8 _2) { return __lasx_xvadda_b(_1, _2); } +// CHECK-LABEL: @xvadda_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvadda.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvadda_h(v16i16 _1, v16i16 _2) { return __lasx_xvadda_h(_1, _2); } +// CHECK-LABEL: @xvadda_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvadda.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvadda_w(v8i32 _1, v8i32 _2) { return __lasx_xvadda_w(_1, _2); } +// CHECK-LABEL: @xvadda_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadda.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvadda_d(v4i64 _1, v4i64 _2) { return __lasx_xvadda_d(_1, _2); } +// CHECK-LABEL: @xvsadd_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsadd.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsadd_b(v32i8 _1, v32i8 _2) { return __lasx_xvsadd_b(_1, _2); } +// CHECK-LABEL: @xvsadd_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsadd.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsadd_h(v16i16 _1, v16i16 _2) { return __lasx_xvsadd_h(_1, _2); } +// CHECK-LABEL: @xvsadd_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsadd.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsadd_w(v8i32 _1, v8i32 _2) { return __lasx_xvsadd_w(_1, _2); } +// CHECK-LABEL: @xvsadd_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsadd.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsadd_d(v4i64 _1, v4i64 _2) { return __lasx_xvsadd_d(_1, _2); } +// CHECK-LABEL: @xvsadd_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsadd.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvsadd_bu(v32u8 _1, v32u8 _2) { return __lasx_xvsadd_bu(_1, _2); } +// CHECK-LABEL: @xvsadd_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsadd.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvsadd_hu(v16u16 _1, v16u16 _2) { return __lasx_xvsadd_hu(_1, _2); } +// CHECK-LABEL: @xvsadd_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsadd.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvsadd_wu(v8u32 _1, v8u32 _2) { return __lasx_xvsadd_wu(_1, _2); } +// CHECK-LABEL: @xvsadd_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsadd.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvsadd_du(v4u64 _1, v4u64 _2) { return __lasx_xvsadd_du(_1, _2); } +// CHECK-LABEL: @xvavg_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavg.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvavg_b(v32i8 _1, v32i8 _2) { return __lasx_xvavg_b(_1, _2); } +// CHECK-LABEL: @xvavg_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavg.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvavg_h(v16i16 _1, v16i16 _2) { return __lasx_xvavg_h(_1, _2); } +// CHECK-LABEL: @xvavg_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavg.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvavg_w(v8i32 _1, v8i32 _2) { return __lasx_xvavg_w(_1, _2); } +// CHECK-LABEL: @xvavg_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavg.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvavg_d(v4i64 _1, v4i64 _2) { return __lasx_xvavg_d(_1, _2); } +// CHECK-LABEL: @xvavg_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavg.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvavg_bu(v32u8 _1, v32u8 _2) { return __lasx_xvavg_bu(_1, _2); } +// CHECK-LABEL: @xvavg_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavg.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvavg_hu(v16u16 _1, v16u16 _2) { return __lasx_xvavg_hu(_1, _2); } +// CHECK-LABEL: @xvavg_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavg.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvavg_wu(v8u32 _1, v8u32 _2) { return __lasx_xvavg_wu(_1, _2); } +// CHECK-LABEL: @xvavg_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavg.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvavg_du(v4u64 _1, v4u64 _2) { return __lasx_xvavg_du(_1, _2); } +// CHECK-LABEL: @xvavgr_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavgr.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvavgr_b(v32i8 _1, v32i8 _2) { return __lasx_xvavgr_b(_1, _2); } +// CHECK-LABEL: @xvavgr_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavgr.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvavgr_h(v16i16 _1, v16i16 _2) { return __lasx_xvavgr_h(_1, _2); } +// CHECK-LABEL: @xvavgr_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavgr.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvavgr_w(v8i32 _1, v8i32 _2) { return __lasx_xvavgr_w(_1, _2); } +// CHECK-LABEL: @xvavgr_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavgr.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvavgr_d(v4i64 _1, v4i64 _2) { return __lasx_xvavgr_d(_1, _2); } +// CHECK-LABEL: @xvavgr_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavgr.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvavgr_bu(v32u8 _1, v32u8 _2) { return __lasx_xvavgr_bu(_1, _2); } +// CHECK-LABEL: @xvavgr_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavgr.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvavgr_hu(v16u16 _1, v16u16 _2) { return __lasx_xvavgr_hu(_1, _2); } +// CHECK-LABEL: @xvavgr_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavgr.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvavgr_wu(v8u32 _1, v8u32 _2) { return __lasx_xvavgr_wu(_1, _2); } +// CHECK-LABEL: @xvavgr_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavgr.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvavgr_du(v4u64 _1, v4u64 _2) { return __lasx_xvavgr_du(_1, _2); } +// CHECK-LABEL: @xvssub_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssub.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvssub_b(v32i8 _1, v32i8 _2) { return __lasx_xvssub_b(_1, _2); } +// CHECK-LABEL: @xvssub_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssub.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvssub_h(v16i16 _1, v16i16 _2) { return __lasx_xvssub_h(_1, _2); } +// CHECK-LABEL: @xvssub_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssub.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvssub_w(v8i32 _1, v8i32 _2) { return __lasx_xvssub_w(_1, _2); } +// CHECK-LABEL: @xvssub_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssub.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvssub_d(v4i64 _1, v4i64 _2) { return __lasx_xvssub_d(_1, _2); } +// CHECK-LABEL: @xvssub_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssub.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvssub_bu(v32u8 _1, v32u8 _2) { return __lasx_xvssub_bu(_1, _2); } +// CHECK-LABEL: @xvssub_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssub.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvssub_hu(v16u16 _1, v16u16 _2) { return __lasx_xvssub_hu(_1, _2); } +// CHECK-LABEL: @xvssub_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssub.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvssub_wu(v8u32 _1, v8u32 _2) { return __lasx_xvssub_wu(_1, _2); } +// CHECK-LABEL: @xvssub_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssub.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvssub_du(v4u64 _1, v4u64 _2) { return __lasx_xvssub_du(_1, _2); } +// CHECK-LABEL: @xvabsd_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvabsd.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvabsd_b(v32i8 _1, v32i8 _2) { return __lasx_xvabsd_b(_1, _2); } +// CHECK-LABEL: @xvabsd_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvabsd.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvabsd_h(v16i16 _1, v16i16 _2) { return __lasx_xvabsd_h(_1, _2); } +// CHECK-LABEL: @xvabsd_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvabsd.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvabsd_w(v8i32 _1, v8i32 _2) { return __lasx_xvabsd_w(_1, _2); } +// CHECK-LABEL: @xvabsd_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvabsd.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvabsd_d(v4i64 _1, v4i64 _2) { return __lasx_xvabsd_d(_1, _2); } +// CHECK-LABEL: @xvabsd_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvabsd.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvabsd_bu(v32u8 _1, v32u8 _2) { return __lasx_xvabsd_bu(_1, _2); } +// CHECK-LABEL: @xvabsd_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvabsd.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvabsd_hu(v16u16 _1, v16u16 _2) { return __lasx_xvabsd_hu(_1, _2); } +// CHECK-LABEL: @xvabsd_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvabsd.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvabsd_wu(v8u32 _1, v8u32 _2) { return __lasx_xvabsd_wu(_1, _2); } +// CHECK-LABEL: @xvabsd_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvabsd.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvabsd_du(v4u64 _1, v4u64 _2) { return __lasx_xvabsd_du(_1, _2); } +// CHECK-LABEL: @xvmul_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmul.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvmul_b(v32i8 _1, v32i8 _2) { return __lasx_xvmul_b(_1, _2); } +// CHECK-LABEL: @xvmul_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmul.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvmul_h(v16i16 _1, v16i16 _2) { return __lasx_xvmul_h(_1, _2); } +// CHECK-LABEL: @xvmul_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmul.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvmul_w(v8i32 _1, v8i32 _2) { return __lasx_xvmul_w(_1, _2); } +// CHECK-LABEL: @xvmul_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmul.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmul_d(v4i64 _1, v4i64 _2) { return __lasx_xvmul_d(_1, _2); } +// CHECK-LABEL: @xvmadd_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_136:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmadd.b(<32 x i8> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]]) +// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvmadd_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __lasx_xvmadd_b(_1, _2, _3); } +// CHECK-LABEL: @xvmadd_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmadd.h(<16 x i16> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]]) +// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvmadd_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __lasx_xvmadd_h(_1, _2, _3); } +// CHECK-LABEL: @xvmadd_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_247:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_358:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmadd.w(<8 x i32> [[_136]], <8 x i32> [[_247]], <8 x i32> [[_358]]) +// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvmadd_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __lasx_xvmadd_w(_1, _2, _3); } +// CHECK-LABEL: @xvmadd_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmadd.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmadd_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __lasx_xvmadd_d(_1, _2, _3); } +// CHECK-LABEL: @xvmsub_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_136:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmsub.b(<32 x i8> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]]) +// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvmsub_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __lasx_xvmsub_b(_1, _2, _3); } +// CHECK-LABEL: @xvmsub_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmsub.h(<16 x i16> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]]) +// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvmsub_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __lasx_xvmsub_h(_1, _2, _3); } +// CHECK-LABEL: @xvmsub_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_247:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_358:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmsub.w(<8 x i32> [[_136]], <8 x i32> [[_247]], <8 x i32> [[_358]]) +// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvmsub_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __lasx_xvmsub_w(_1, _2, _3); } +// CHECK-LABEL: @xvmsub_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmsub.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmsub_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __lasx_xvmsub_d(_1, _2, _3); } +// CHECK-LABEL: @xvdiv_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvdiv.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvdiv_b(v32i8 _1, v32i8 _2) { return __lasx_xvdiv_b(_1, _2); } +// CHECK-LABEL: @xvdiv_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvdiv.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvdiv_h(v16i16 _1, v16i16 _2) { return __lasx_xvdiv_h(_1, _2); } +// CHECK-LABEL: @xvdiv_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvdiv.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvdiv_w(v8i32 _1, v8i32 _2) { return __lasx_xvdiv_w(_1, _2); } +// CHECK-LABEL: @xvdiv_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvdiv.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvdiv_d(v4i64 _1, v4i64 _2) { return __lasx_xvdiv_d(_1, _2); } +// CHECK-LABEL: @xvdiv_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvdiv.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvdiv_bu(v32u8 _1, v32u8 _2) { return __lasx_xvdiv_bu(_1, _2); } +// CHECK-LABEL: @xvdiv_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvdiv.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvdiv_hu(v16u16 _1, v16u16 _2) { return __lasx_xvdiv_hu(_1, _2); } +// CHECK-LABEL: @xvdiv_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvdiv.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvdiv_wu(v8u32 _1, v8u32 _2) { return __lasx_xvdiv_wu(_1, _2); } +// CHECK-LABEL: @xvdiv_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvdiv.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvdiv_du(v4u64 _1, v4u64 _2) { return __lasx_xvdiv_du(_1, _2); } +// CHECK-LABEL: @xvhaddw_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhaddw.h.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvhaddw_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvhaddw_h_b(_1, _2); } +// CHECK-LABEL: @xvhaddw_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhaddw.w.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvhaddw_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvhaddw_w_h(_1, _2); } +// CHECK-LABEL: @xvhaddw_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.d.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvhaddw_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvhaddw_d_w(_1, _2); } +// CHECK-LABEL: @xvhaddw_hu_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhaddw.hu.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvhaddw_hu_bu(v32u8 _1, v32u8 _2) { return __lasx_xvhaddw_hu_bu(_1, _2); } +// CHECK-LABEL: @xvhaddw_wu_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhaddw.wu.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvhaddw_wu_hu(v16u16 _1, v16u16 _2) { return __lasx_xvhaddw_wu_hu(_1, _2); } +// CHECK-LABEL: @xvhaddw_du_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.du.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvhaddw_du_wu(v8u32 _1, v8u32 _2) { return __lasx_xvhaddw_du_wu(_1, _2); } +// CHECK-LABEL: @xvhsubw_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhsubw.h.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvhsubw_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvhsubw_h_b(_1, _2); } +// CHECK-LABEL: @xvhsubw_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhsubw.w.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvhsubw_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvhsubw_w_h(_1, _2); } +// CHECK-LABEL: @xvhsubw_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.d.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvhsubw_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvhsubw_d_w(_1, _2); } +// CHECK-LABEL: @xvhsubw_hu_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhsubw.hu.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvhsubw_hu_bu(v32u8 _1, v32u8 _2) { return __lasx_xvhsubw_hu_bu(_1, _2); } +// CHECK-LABEL: @xvhsubw_wu_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhsubw.wu.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvhsubw_wu_hu(v16u16 _1, v16u16 _2) { return __lasx_xvhsubw_wu_hu(_1, _2); } +// CHECK-LABEL: @xvhsubw_du_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.du.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvhsubw_du_wu(v8u32 _1, v8u32 _2) { return __lasx_xvhsubw_du_wu(_1, _2); } +// CHECK-LABEL: @xvmod_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmod.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvmod_b(v32i8 _1, v32i8 _2) { return __lasx_xvmod_b(_1, _2); } +// CHECK-LABEL: @xvmod_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmod.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvmod_h(v16i16 _1, v16i16 _2) { return __lasx_xvmod_h(_1, _2); } +// CHECK-LABEL: @xvmod_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmod.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvmod_w(v8i32 _1, v8i32 _2) { return __lasx_xvmod_w(_1, _2); } +// CHECK-LABEL: @xvmod_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmod.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmod_d(v4i64 _1, v4i64 _2) { return __lasx_xvmod_d(_1, _2); } +// CHECK-LABEL: @xvmod_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmod.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvmod_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmod_bu(_1, _2); } +// CHECK-LABEL: @xvmod_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmod.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvmod_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmod_hu(_1, _2); } +// CHECK-LABEL: @xvmod_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmod.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvmod_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmod_wu(_1, _2); } +// CHECK-LABEL: @xvmod_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmod.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvmod_du(v4u64 _1, v4u64 _2) { return __lasx_xvmod_du(_1, _2); } +// CHECK-LABEL: @xvrepl128vei_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrepl128vei.b(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvrepl128vei_b(v32i8 _1) { return __lasx_xvrepl128vei_b(_1, 1); } +// CHECK-LABEL: @xvrepl128vei_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrepl128vei.h(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvrepl128vei_h(v16i16 _1) { return __lasx_xvrepl128vei_h(_1, 1); } +// CHECK-LABEL: @xvrepl128vei_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrepl128vei.w(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvrepl128vei_w(v8i32 _1) { return __lasx_xvrepl128vei_w(_1, 1); } +// CHECK-LABEL: @xvrepl128vei_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrepl128vei.d(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvrepl128vei_d(v4i64 _1) { return __lasx_xvrepl128vei_d(_1, 1); } +// CHECK-LABEL: @xvpickev_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpickev.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvpickev_b(v32i8 _1, v32i8 _2) { return __lasx_xvpickev_b(_1, _2); } +// CHECK-LABEL: @xvpickev_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpickev.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvpickev_h(v16i16 _1, v16i16 _2) { return __lasx_xvpickev_h(_1, _2); } +// CHECK-LABEL: @xvpickev_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickev.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvpickev_w(v8i32 _1, v8i32 _2) { return __lasx_xvpickev_w(_1, _2); } +// CHECK-LABEL: @xvpickev_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickev.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvpickev_d(v4i64 _1, v4i64 _2) { return __lasx_xvpickev_d(_1, _2); } +// CHECK-LABEL: @xvpickod_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpickod.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvpickod_b(v32i8 _1, v32i8 _2) { return __lasx_xvpickod_b(_1, _2); } +// CHECK-LABEL: @xvpickod_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpickod.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvpickod_h(v16i16 _1, v16i16 _2) { return __lasx_xvpickod_h(_1, _2); } +// CHECK-LABEL: @xvpickod_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickod.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvpickod_w(v8i32 _1, v8i32 _2) { return __lasx_xvpickod_w(_1, _2); } +// CHECK-LABEL: @xvpickod_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickod.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvpickod_d(v4i64 _1, v4i64 _2) { return __lasx_xvpickod_d(_1, _2); } +// CHECK-LABEL: @xvilvh_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvilvh.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvilvh_b(v32i8 _1, v32i8 _2) { return __lasx_xvilvh_b(_1, _2); } +// CHECK-LABEL: @xvilvh_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvilvh.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvilvh_h(v16i16 _1, v16i16 _2) { return __lasx_xvilvh_h(_1, _2); } +// CHECK-LABEL: @xvilvh_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvilvh.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvilvh_w(v8i32 _1, v8i32 _2) { return __lasx_xvilvh_w(_1, _2); } +// CHECK-LABEL: @xvilvh_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvilvh.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvilvh_d(v4i64 _1, v4i64 _2) { return __lasx_xvilvh_d(_1, _2); } +// CHECK-LABEL: @xvilvl_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvilvl.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvilvl_b(v32i8 _1, v32i8 _2) { return __lasx_xvilvl_b(_1, _2); } +// CHECK-LABEL: @xvilvl_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvilvl.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvilvl_h(v16i16 _1, v16i16 _2) { return __lasx_xvilvl_h(_1, _2); } +// CHECK-LABEL: @xvilvl_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvilvl.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvilvl_w(v8i32 _1, v8i32 _2) { return __lasx_xvilvl_w(_1, _2); } +// CHECK-LABEL: @xvilvl_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvilvl.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvilvl_d(v4i64 _1, v4i64 _2) { return __lasx_xvilvl_d(_1, _2); } +// CHECK-LABEL: @xvpackev_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpackev.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvpackev_b(v32i8 _1, v32i8 _2) { return __lasx_xvpackev_b(_1, _2); } +// CHECK-LABEL: @xvpackev_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpackev.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvpackev_h(v16i16 _1, v16i16 _2) { return __lasx_xvpackev_h(_1, _2); } +// CHECK-LABEL: @xvpackev_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpackev.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvpackev_w(v8i32 _1, v8i32 _2) { return __lasx_xvpackev_w(_1, _2); } +// CHECK-LABEL: @xvpackev_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpackev.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvpackev_d(v4i64 _1, v4i64 _2) { return __lasx_xvpackev_d(_1, _2); } +// CHECK-LABEL: @xvpackod_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpackod.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvpackod_b(v32i8 _1, v32i8 _2) { return __lasx_xvpackod_b(_1, _2); } +// CHECK-LABEL: @xvpackod_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpackod.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvpackod_h(v16i16 _1, v16i16 _2) { return __lasx_xvpackod_h(_1, _2); } +// CHECK-LABEL: @xvpackod_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpackod.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvpackod_w(v8i32 _1, v8i32 _2) { return __lasx_xvpackod_w(_1, _2); } +// CHECK-LABEL: @xvpackod_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpackod.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvpackod_d(v4i64 _1, v4i64 _2) { return __lasx_xvpackod_d(_1, _2); } +// CHECK-LABEL: @xvshuf_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_136:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvshuf.b(<32 x i8> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]]) +// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvshuf_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __lasx_xvshuf_b(_1, _2, _3); } +// CHECK-LABEL: @xvshuf_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvshuf.h(<16 x i16> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]]) +// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvshuf_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __lasx_xvshuf_h(_1, _2, _3); } +// CHECK-LABEL: @xvshuf_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_247:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_358:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvshuf.w(<8 x i32> [[_136]], <8 x i32> [[_247]], <8 x i32> [[_358]]) +// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvshuf_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __lasx_xvshuf_w(_1, _2, _3); } +// CHECK-LABEL: @xvshuf_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvshuf.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvshuf_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __lasx_xvshuf_d(_1, _2, _3); } +// CHECK-LABEL: @xvand_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvand.v(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvand_v(v32u8 _1, v32u8 _2) { return __lasx_xvand_v(_1, _2); } +// CHECK-LABEL: @xvandi_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvandi.b(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvandi_b(v32u8 _1) { return __lasx_xvandi_b(_1, 1); } +// CHECK-LABEL: @xvor_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvor.v(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvor_v(v32u8 _1, v32u8 _2) { return __lasx_xvor_v(_1, _2); } +// CHECK-LABEL: @xvori_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvori.b(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvori_b(v32u8 _1) { return __lasx_xvori_b(_1, 1); } +// CHECK-LABEL: @xvnor_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvnor.v(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvnor_v(v32u8 _1, v32u8 _2) { return __lasx_xvnor_v(_1, _2); } +// CHECK-LABEL: @xvnori_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvnori.b(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvnori_b(v32u8 _1) { return __lasx_xvnori_b(_1, 1); } +// CHECK-LABEL: @xvxor_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvxor.v(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvxor_v(v32u8 _1, v32u8 _2) { return __lasx_xvxor_v(_1, _2); } +// CHECK-LABEL: @xvxori_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvxori.b(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvxori_b(v32u8 _1) { return __lasx_xvxori_b(_1, 1); } +// CHECK-LABEL: @xvbitsel_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_136:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitsel.v(<32 x i8> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]]) +// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvbitsel_v(v32u8 _1, v32u8 _2, v32u8 _3) { return __lasx_xvbitsel_v(_1, _2, _3); } +// CHECK-LABEL: @xvbitseli_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitseli.b(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvbitseli_b(v32u8 _1, v32u8 _2) { return __lasx_xvbitseli_b(_1, _2, 1); } +// CHECK-LABEL: @xvshuf4i_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvshuf4i.b(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvshuf4i_b(v32i8 _1) { return __lasx_xvshuf4i_b(_1, 1); } +// CHECK-LABEL: @xvshuf4i_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvshuf4i.h(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvshuf4i_h(v16i16 _1) { return __lasx_xvshuf4i_h(_1, 1); } +// CHECK-LABEL: @xvshuf4i_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvshuf4i.w(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvshuf4i_w(v8i32 _1) { return __lasx_xvshuf4i_w(_1, 1); } +// CHECK-LABEL: @xvreplgr2vr_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplgr2vr.b(i32 [[_1:%.*]]) +// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvreplgr2vr_b(int _1) { return __lasx_xvreplgr2vr_b(_1); } +// CHECK-LABEL: @xvreplgr2vr_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplgr2vr.h(i32 [[_1:%.*]]) +// CHECK-NEXT: store <16 x i16> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvreplgr2vr_h(int _1) { return __lasx_xvreplgr2vr_h(_1); } +// CHECK-LABEL: @xvreplgr2vr_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplgr2vr.w(i32 [[_1:%.*]]) +// CHECK-NEXT: store <8 x i32> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvreplgr2vr_w(int _1) { return __lasx_xvreplgr2vr_w(_1); } +// CHECK-LABEL: @xvreplgr2vr_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[CONV:%.*]] = sext i32 [[_1:%.*]] to i64 +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplgr2vr.d(i64 [[CONV]]) +// CHECK-NEXT: store <4 x i64> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvreplgr2vr_d(int _1) { return __lasx_xvreplgr2vr_d(_1); } +// CHECK-LABEL: @xvpcnt_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpcnt.b(<32 x i8> [[_112]]) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvpcnt_b(v32i8 _1) { return __lasx_xvpcnt_b(_1); } +// CHECK-LABEL: @xvpcnt_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpcnt.h(<16 x i16> [[_112]]) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvpcnt_h(v16i16 _1) { return __lasx_xvpcnt_h(_1); } +// CHECK-LABEL: @xvpcnt_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpcnt.w(<8 x i32> [[_112]]) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvpcnt_w(v8i32 _1) { return __lasx_xvpcnt_w(_1); } +// CHECK-LABEL: @xvpcnt_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpcnt.d(<4 x i64> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvpcnt_d(v4i64 _1) { return __lasx_xvpcnt_d(_1); } +// CHECK-LABEL: @xvclo_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvclo.b(<32 x i8> [[_112]]) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvclo_b(v32i8 _1) { return __lasx_xvclo_b(_1); } +// CHECK-LABEL: @xvclo_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvclo.h(<16 x i16> [[_112]]) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvclo_h(v16i16 _1) { return __lasx_xvclo_h(_1); } +// CHECK-LABEL: @xvclo_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvclo.w(<8 x i32> [[_112]]) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvclo_w(v8i32 _1) { return __lasx_xvclo_w(_1); } +// CHECK-LABEL: @xvclo_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvclo.d(<4 x i64> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvclo_d(v4i64 _1) { return __lasx_xvclo_d(_1); } +// CHECK-LABEL: @xvclz_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvclz.b(<32 x i8> [[_112]]) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvclz_b(v32i8 _1) { return __lasx_xvclz_b(_1); } +// CHECK-LABEL: @xvclz_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvclz.h(<16 x i16> [[_112]]) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvclz_h(v16i16 _1) { return __lasx_xvclz_h(_1); } +// CHECK-LABEL: @xvclz_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvclz.w(<8 x i32> [[_112]]) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvclz_w(v8i32 _1) { return __lasx_xvclz_w(_1); } +// CHECK-LABEL: @xvclz_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvclz.d(<4 x i64> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvclz_d(v4i64 _1) { return __lasx_xvclz_d(_1); } +// CHECK-LABEL: @xvfadd_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfadd.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvfadd_s(v8f32 _1, v8f32 _2) { return __lasx_xvfadd_s(_1, _2); } +// CHECK-LABEL: @xvfadd_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfadd.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvfadd_d(v4f64 _1, v4f64 _2) { return __lasx_xvfadd_d(_1, _2); } +// CHECK-LABEL: @xvfsub_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfsub.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvfsub_s(v8f32 _1, v8f32 _2) { return __lasx_xvfsub_s(_1, _2); } +// CHECK-LABEL: @xvfsub_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfsub.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvfsub_d(v4f64 _1, v4f64 _2) { return __lasx_xvfsub_d(_1, _2); } +// CHECK-LABEL: @xvfmul_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmul.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvfmul_s(v8f32 _1, v8f32 _2) { return __lasx_xvfmul_s(_1, _2); } +// CHECK-LABEL: @xvfmul_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmul.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvfmul_d(v4f64 _1, v4f64 _2) { return __lasx_xvfmul_d(_1, _2); } +// CHECK-LABEL: @xvfdiv_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfdiv.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvfdiv_s(v8f32 _1, v8f32 _2) { return __lasx_xvfdiv_s(_1, _2); } +// CHECK-LABEL: @xvfdiv_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfdiv.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvfdiv_d(v4f64 _1, v4f64 _2) { return __lasx_xvfdiv_d(_1, _2); } +// CHECK-LABEL: @xvfcvt_h_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfcvt.h.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvfcvt_h_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcvt_h_s(_1, _2); } +// CHECK-LABEL: @xvfcvt_s_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvt.s.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvfcvt_s_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcvt_s_d(_1, _2); } +// CHECK-LABEL: @xvfmin_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmin.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvfmin_s(v8f32 _1, v8f32 _2) { return __lasx_xvfmin_s(_1, _2); } +// CHECK-LABEL: @xvfmin_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmin.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvfmin_d(v4f64 _1, v4f64 _2) { return __lasx_xvfmin_d(_1, _2); } +// CHECK-LABEL: @xvfmina_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmina.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvfmina_s(v8f32 _1, v8f32 _2) { return __lasx_xvfmina_s(_1, _2); } +// CHECK-LABEL: @xvfmina_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmina.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvfmina_d(v4f64 _1, v4f64 _2) { return __lasx_xvfmina_d(_1, _2); } +// CHECK-LABEL: @xvfmax_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmax.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvfmax_s(v8f32 _1, v8f32 _2) { return __lasx_xvfmax_s(_1, _2); } +// CHECK-LABEL: @xvfmax_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmax.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvfmax_d(v4f64 _1, v4f64 _2) { return __lasx_xvfmax_d(_1, _2); } +// CHECK-LABEL: @xvfmaxa_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmaxa.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvfmaxa_s(v8f32 _1, v8f32 _2) { return __lasx_xvfmaxa_s(_1, _2); } +// CHECK-LABEL: @xvfmaxa_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmaxa.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvfmaxa_d(v4f64 _1, v4f64 _2) { return __lasx_xvfmaxa_d(_1, _2); } +// CHECK-LABEL: @xvfclass_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfclass.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfclass_s(v8f32 _1) { return __lasx_xvfclass_s(_1); } +// CHECK-LABEL: @xvfclass_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfclass.d(<4 x double> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfclass_d(v4f64 _1) { return __lasx_xvfclass_d(_1); } +// CHECK-LABEL: @xvfsqrt_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfsqrt.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvfsqrt_s(v8f32 _1) { return __lasx_xvfsqrt_s(_1); } +// CHECK-LABEL: @xvfsqrt_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfsqrt.d(<4 x double> [[_1]]) +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvfsqrt_d(v4f64 _1) { return __lasx_xvfsqrt_d(_1); } +// CHECK-LABEL: @xvfrecip_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrecip.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvfrecip_s(v8f32 _1) { return __lasx_xvfrecip_s(_1); } +// CHECK-LABEL: @xvfrecip_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrecip.d(<4 x double> [[_1]]) +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvfrecip_d(v4f64 _1) { return __lasx_xvfrecip_d(_1); } +// CHECK-LABEL: @xvfrint_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrint.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvfrint_s(v8f32 _1) { return __lasx_xvfrint_s(_1); } +// CHECK-LABEL: @xvfrint_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrint.d(<4 x double> [[_1]]) +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvfrint_d(v4f64 _1) { return __lasx_xvfrint_d(_1); } +// CHECK-LABEL: @xvfrsqrt_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrsqrt.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvfrsqrt_s(v8f32 _1) { return __lasx_xvfrsqrt_s(_1); } +// CHECK-LABEL: @xvfrsqrt_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrsqrt.d(<4 x double> [[_1]]) +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvfrsqrt_d(v4f64 _1) { return __lasx_xvfrsqrt_d(_1); } +// CHECK-LABEL: @xvflogb_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvflogb.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvflogb_s(v8f32 _1) { return __lasx_xvflogb_s(_1); } +// CHECK-LABEL: @xvflogb_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvflogb.d(<4 x double> [[_1]]) +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvflogb_d(v4f64 _1) { return __lasx_xvflogb_d(_1); } +// CHECK-LABEL: @xvfcvth_s_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvth.s.h(<16 x i16> [[_112]]) +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvfcvth_s_h(v16i16 _1) { return __lasx_xvfcvth_s_h(_1); } +// CHECK-LABEL: @xvfcvth_d_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfcvth.d.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvfcvth_d_s(v8f32 _1) { return __lasx_xvfcvth_d_s(_1); } +// CHECK-LABEL: @xvfcvtl_s_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvtl.s.h(<16 x i16> [[_112]]) +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvfcvtl_s_h(v16i16 _1) { return __lasx_xvfcvtl_s_h(_1); } +// CHECK-LABEL: @xvfcvtl_d_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfcvtl.d.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvfcvtl_d_s(v8f32 _1) { return __lasx_xvfcvtl_d_s(_1); } +// CHECK-LABEL: @xvftint_w_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.w.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvftint_w_s(v8f32 _1) { return __lasx_xvftint_w_s(_1); } +// CHECK-LABEL: @xvftint_l_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftint.l.d(<4 x double> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvftint_l_d(v4f64 _1) { return __lasx_xvftint_l_d(_1); } +// CHECK-LABEL: @xvftint_wu_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.wu.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvftint_wu_s(v8f32 _1) { return __lasx_xvftint_wu_s(_1); } +// CHECK-LABEL: @xvftint_lu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftint.lu.d(<4 x double> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvftint_lu_d(v4f64 _1) { return __lasx_xvftint_lu_d(_1); } +// CHECK-LABEL: @xvftintrz_w_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvftintrz_w_s(v8f32 _1) { return __lasx_xvftintrz_w_s(_1); } +// CHECK-LABEL: @xvftintrz_l_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrz.l.d(<4 x double> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvftintrz_l_d(v4f64 _1) { return __lasx_xvftintrz_l_d(_1); } +// CHECK-LABEL: @xvftintrz_wu_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.wu.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvftintrz_wu_s(v8f32 _1) { return __lasx_xvftintrz_wu_s(_1); } +// CHECK-LABEL: @xvftintrz_lu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrz.lu.d(<4 x double> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvftintrz_lu_d(v4f64 _1) { return __lasx_xvftintrz_lu_d(_1); } +// CHECK-LABEL: @xvffint_s_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.w(<8 x i32> [[_112]]) +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvffint_s_w(v8i32 _1) { return __lasx_xvffint_s_w(_1); } +// CHECK-LABEL: @xvffint_d_l( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffint.d.l(<4 x i64> [[_1]]) +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvffint_d_l(v4i64 _1) { return __lasx_xvffint_d_l(_1); } +// CHECK-LABEL: @xvffint_s_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.wu(<8 x i32> [[_112]]) +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvffint_s_wu(v8u32 _1) { return __lasx_xvffint_s_wu(_1); } +// CHECK-LABEL: @xvffint_d_lu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffint.d.lu(<4 x i64> [[_1]]) +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvffint_d_lu(v4u64 _1) { return __lasx_xvffint_d_lu(_1); } +// CHECK-LABEL: @xvreplve_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve.b(<32 x i8> [[_112]], i32 [[_2:%.*]]) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvreplve_b(v32i8 _1, int _2) { return __lasx_xvreplve_b(_1, _2); } +// CHECK-LABEL: @xvreplve_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplve.h(<16 x i16> [[_112]], i32 [[_2:%.*]]) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvreplve_h(v16i16 _1, int _2) { return __lasx_xvreplve_h(_1, _2); } +// CHECK-LABEL: @xvreplve_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplve.w(<8 x i32> [[_112]], i32 [[_2:%.*]]) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvreplve_w(v8i32 _1, int _2) { return __lasx_xvreplve_w(_1, _2); } +// CHECK-LABEL: @xvreplve_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplve.d(<4 x i64> [[_1]], i32 [[_2:%.*]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvreplve_d(v4i64 _1, int _2) { return __lasx_xvreplve_d(_1, _2); } +// CHECK-LABEL: @xvpermi_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpermi.w(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvpermi_w(v8i32 _1, v8i32 _2) { return __lasx_xvpermi_w(_1, _2, 1); } +// CHECK-LABEL: @xvandn_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvandn.v(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvandn_v(v32u8 _1, v32u8 _2) { return __lasx_xvandn_v(_1, _2); } +// CHECK-LABEL: @xvneg_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvneg.b(<32 x i8> [[_112]]) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvneg_b(v32i8 _1) { return __lasx_xvneg_b(_1); } +// CHECK-LABEL: @xvneg_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvneg.h(<16 x i16> [[_112]]) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvneg_h(v16i16 _1) { return __lasx_xvneg_h(_1); } +// CHECK-LABEL: @xvneg_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvneg.w(<8 x i32> [[_112]]) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvneg_w(v8i32 _1) { return __lasx_xvneg_w(_1); } +// CHECK-LABEL: @xvneg_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvneg.d(<4 x i64> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvneg_d(v4i64 _1) { return __lasx_xvneg_d(_1); } +// CHECK-LABEL: @xvmuh_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmuh.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvmuh_b(v32i8 _1, v32i8 _2) { return __lasx_xvmuh_b(_1, _2); } +// CHECK-LABEL: @xvmuh_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmuh.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvmuh_h(v16i16 _1, v16i16 _2) { return __lasx_xvmuh_h(_1, _2); } +// CHECK-LABEL: @xvmuh_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmuh.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvmuh_w(v8i32 _1, v8i32 _2) { return __lasx_xvmuh_w(_1, _2); } +// CHECK-LABEL: @xvmuh_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmuh.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmuh_d(v4i64 _1, v4i64 _2) { return __lasx_xvmuh_d(_1, _2); } +// CHECK-LABEL: @xvmuh_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmuh.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvmuh_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmuh_bu(_1, _2); } +// CHECK-LABEL: @xvmuh_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmuh.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvmuh_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmuh_hu(_1, _2); } +// CHECK-LABEL: @xvmuh_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmuh.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvmuh_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmuh_wu(_1, _2); } +// CHECK-LABEL: @xvmuh_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmuh.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvmuh_du(v4u64 _1, v4u64 _2) { return __lasx_xvmuh_du(_1, _2); } +// CHECK-LABEL: @xvsllwil_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsllwil.h.b(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsllwil_h_b(v32i8 _1) { return __lasx_xvsllwil_h_b(_1, 1); } +// CHECK-LABEL: @xvsllwil_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsllwil.w.h(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsllwil_w_h(v16i16 _1) { return __lasx_xvsllwil_w_h(_1, 1); } +// CHECK-LABEL: @xvsllwil_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsllwil.d.w(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsllwil_d_w(v8i32 _1) { return __lasx_xvsllwil_d_w(_1, 1); } +// CHECK-LABEL: @xvsllwil_hu_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsllwil.hu.bu(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvsllwil_hu_bu(v32u8 _1) { return __lasx_xvsllwil_hu_bu(_1, 1); } +// CHECK-LABEL: @xvsllwil_wu_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsllwil.wu.hu(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvsllwil_wu_hu(v16u16 _1) { return __lasx_xvsllwil_wu_hu(_1, 1); } +// CHECK-LABEL: @xvsllwil_du_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsllwil.du.wu(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvsllwil_du_wu(v8u32 _1) { return __lasx_xvsllwil_du_wu(_1, 1); } +// CHECK-LABEL: @xvsran_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsran.b.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsran_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvsran_b_h(_1, _2); } +// CHECK-LABEL: @xvsran_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsran.h.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsran_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvsran_h_w(_1, _2); } +// CHECK-LABEL: @xvsran_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsran.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsran_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvsran_w_d(_1, _2); } +// CHECK-LABEL: @xvssran_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssran.b.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvssran_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvssran_b_h(_1, _2); } +// CHECK-LABEL: @xvssran_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssran.h.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvssran_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvssran_h_w(_1, _2); } +// CHECK-LABEL: @xvssran_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssran.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvssran_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvssran_w_d(_1, _2); } +// CHECK-LABEL: @xvssran_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssran.bu.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvssran_bu_h(v16u16 _1, v16u16 _2) { return __lasx_xvssran_bu_h(_1, _2); } +// CHECK-LABEL: @xvssran_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssran.hu.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvssran_hu_w(v8u32 _1, v8u32 _2) { return __lasx_xvssran_hu_w(_1, _2); } +// CHECK-LABEL: @xvssran_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssran.wu.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvssran_wu_d(v4u64 _1, v4u64 _2) { return __lasx_xvssran_wu_d(_1, _2); } +// CHECK-LABEL: @xvsrarn_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrarn.b.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsrarn_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrarn_b_h(_1, _2); } +// CHECK-LABEL: @xvsrarn_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrarn.h.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsrarn_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrarn_h_w(_1, _2); } +// CHECK-LABEL: @xvsrarn_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrarn.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsrarn_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrarn_w_d(_1, _2); } +// CHECK-LABEL: @xvssrarn_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarn.b.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvssrarn_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvssrarn_b_h(_1, _2); } +// CHECK-LABEL: @xvssrarn_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarn.h.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvssrarn_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvssrarn_h_w(_1, _2); } +// CHECK-LABEL: @xvssrarn_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarn.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvssrarn_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvssrarn_w_d(_1, _2); } +// CHECK-LABEL: @xvssrarn_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarn.bu.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvssrarn_bu_h(v16u16 _1, v16u16 _2) { return __lasx_xvssrarn_bu_h(_1, _2); } +// CHECK-LABEL: @xvssrarn_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarn.hu.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvssrarn_hu_w(v8u32 _1, v8u32 _2) { return __lasx_xvssrarn_hu_w(_1, _2); } +// CHECK-LABEL: @xvssrarn_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarn.wu.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvssrarn_wu_d(v4u64 _1, v4u64 _2) { return __lasx_xvssrarn_wu_d(_1, _2); } +// CHECK-LABEL: @xvsrln_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrln.b.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsrln_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrln_b_h(_1, _2); } +// CHECK-LABEL: @xvsrln_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrln.h.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsrln_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrln_h_w(_1, _2); } +// CHECK-LABEL: @xvsrln_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrln.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsrln_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrln_w_d(_1, _2); } +// CHECK-LABEL: @xvssrln_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrln.bu.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvssrln_bu_h(v16u16 _1, v16u16 _2) { return __lasx_xvssrln_bu_h(_1, _2); } +// CHECK-LABEL: @xvssrln_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrln.hu.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvssrln_hu_w(v8u32 _1, v8u32 _2) { return __lasx_xvssrln_hu_w(_1, _2); } +// CHECK-LABEL: @xvssrln_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrln.wu.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvssrln_wu_d(v4u64 _1, v4u64 _2) { return __lasx_xvssrln_wu_d(_1, _2); } +// CHECK-LABEL: @xvsrlrn_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlrn.b.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsrlrn_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrlrn_b_h(_1, _2); } +// CHECK-LABEL: @xvsrlrn_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlrn.h.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsrlrn_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrlrn_h_w(_1, _2); } +// CHECK-LABEL: @xvsrlrn_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlrn.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsrlrn_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrlrn_w_d(_1, _2); } +// CHECK-LABEL: @xvssrlrn_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.bu.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvssrlrn_bu_h(v16u16 _1, v16u16 _2) { return __lasx_xvssrlrn_bu_h(_1, _2); } +// CHECK-LABEL: @xvssrlrn_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.hu.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvssrlrn_hu_w(v8u32 _1, v8u32 _2) { return __lasx_xvssrlrn_hu_w(_1, _2); } +// CHECK-LABEL: @xvssrlrn_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.wu.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvssrlrn_wu_d(v4u64 _1, v4u64 _2) { return __lasx_xvssrlrn_wu_d(_1, _2); } +// CHECK-LABEL: @xvfrstpi_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvfrstpi.b(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvfrstpi_b(v32i8 _1, v32i8 _2) { return __lasx_xvfrstpi_b(_1, _2, 1); } +// CHECK-LABEL: @xvfrstpi_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfrstpi.h(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvfrstpi_h(v16i16 _1, v16i16 _2) { return __lasx_xvfrstpi_h(_1, _2, 1); } +// CHECK-LABEL: @xvfrstp_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_136:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvfrstp.b(<32 x i8> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]]) +// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvfrstp_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __lasx_xvfrstp_b(_1, _2, _3); } +// CHECK-LABEL: @xvfrstp_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfrstp.h(<16 x i16> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]]) +// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvfrstp_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __lasx_xvfrstp_h(_1, _2, _3); } +// CHECK-LABEL: @xvshuf4i_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvshuf4i.d(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvshuf4i_d(v4i64 _1, v4i64 _2) { return __lasx_xvshuf4i_d(_1, _2, 1); } +// CHECK-LABEL: @xvbsrl_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbsrl.v(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvbsrl_v(v32i8 _1) { return __lasx_xvbsrl_v(_1, 1); } +// CHECK-LABEL: @xvbsll_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbsll.v(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvbsll_v(v32i8 _1) { return __lasx_xvbsll_v(_1, 1); } +// CHECK-LABEL: @xvextrins_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvextrins.b(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvextrins_b(v32i8 _1, v32i8 _2) { return __lasx_xvextrins_b(_1, _2, 1); } +// CHECK-LABEL: @xvextrins_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvextrins.h(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvextrins_h(v16i16 _1, v16i16 _2) { return __lasx_xvextrins_h(_1, _2, 1); } +// CHECK-LABEL: @xvextrins_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvextrins.w(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvextrins_w(v8i32 _1, v8i32 _2) { return __lasx_xvextrins_w(_1, _2, 1); } +// CHECK-LABEL: @xvextrins_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextrins.d(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvextrins_d(v4i64 _1, v4i64 _2) { return __lasx_xvextrins_d(_1, _2, 1); } +// CHECK-LABEL: @xvmskltz_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmskltz.b(<32 x i8> [[_112]]) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvmskltz_b(v32i8 _1) { return __lasx_xvmskltz_b(_1); } +// CHECK-LABEL: @xvmskltz_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmskltz.h(<16 x i16> [[_112]]) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvmskltz_h(v16i16 _1) { return __lasx_xvmskltz_h(_1); } +// CHECK-LABEL: @xvmskltz_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmskltz.w(<8 x i32> [[_112]]) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvmskltz_w(v8i32 _1) { return __lasx_xvmskltz_w(_1); } +// CHECK-LABEL: @xvmskltz_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmskltz.d(<4 x i64> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmskltz_d(v4i64 _1) { return __lasx_xvmskltz_d(_1); } +// CHECK-LABEL: @xvsigncov_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsigncov.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsigncov_b(v32i8 _1, v32i8 _2) { return __lasx_xvsigncov_b(_1, _2); } +// CHECK-LABEL: @xvsigncov_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsigncov.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsigncov_h(v16i16 _1, v16i16 _2) { return __lasx_xvsigncov_h(_1, _2); } +// CHECK-LABEL: @xvsigncov_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsigncov.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsigncov_w(v8i32 _1, v8i32 _2) { return __lasx_xvsigncov_w(_1, _2); } +// CHECK-LABEL: @xvsigncov_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsigncov.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsigncov_d(v4i64 _1, v4i64 _2) { return __lasx_xvsigncov_d(_1, _2); } +// CHECK-LABEL: @xvfmadd_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <8 x float>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmadd.s(<8 x float> [[_1]], <8 x float> [[_2]], <8 x float> [[_3]]) +// CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvfmadd_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __lasx_xvfmadd_s(_1, _2, _3); } +// CHECK-LABEL: @xvfmadd_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x double>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmadd.d(<4 x double> [[_1]], <4 x double> [[_2]], <4 x double> [[_3]]) +// CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvfmadd_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __lasx_xvfmadd_d(_1, _2, _3); } +// CHECK-LABEL: @xvfmsub_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <8 x float>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmsub.s(<8 x float> [[_1]], <8 x float> [[_2]], <8 x float> [[_3]]) +// CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvfmsub_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __lasx_xvfmsub_s(_1, _2, _3); } +// CHECK-LABEL: @xvfmsub_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x double>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmsub.d(<4 x double> [[_1]], <4 x double> [[_2]], <4 x double> [[_3]]) +// CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvfmsub_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __lasx_xvfmsub_d(_1, _2, _3); } +// CHECK-LABEL: @xvfnmadd_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <8 x float>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfnmadd.s(<8 x float> [[_1]], <8 x float> [[_2]], <8 x float> [[_3]]) +// CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvfnmadd_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __lasx_xvfnmadd_s(_1, _2, _3); } +// CHECK-LABEL: @xvfnmadd_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x double>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfnmadd.d(<4 x double> [[_1]], <4 x double> [[_2]], <4 x double> [[_3]]) +// CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvfnmadd_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __lasx_xvfnmadd_d(_1, _2, _3); } +// CHECK-LABEL: @xvfnmsub_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <8 x float>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfnmsub.s(<8 x float> [[_1]], <8 x float> [[_2]], <8 x float> [[_3]]) +// CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvfnmsub_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __lasx_xvfnmsub_s(_1, _2, _3); } +// CHECK-LABEL: @xvfnmsub_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x double>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfnmsub.d(<4 x double> [[_1]], <4 x double> [[_2]], <4 x double> [[_3]]) +// CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvfnmsub_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __lasx_xvfnmsub_d(_1, _2, _3); } +// CHECK-LABEL: @xvftintrne_w_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvftintrne_w_s(v8f32 _1) { return __lasx_xvftintrne_w_s(_1); } +// CHECK-LABEL: @xvftintrne_l_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrne.l.d(<4 x double> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvftintrne_l_d(v4f64 _1) { return __lasx_xvftintrne_l_d(_1); } +// CHECK-LABEL: @xvftintrp_w_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvftintrp_w_s(v8f32 _1) { return __lasx_xvftintrp_w_s(_1); } +// CHECK-LABEL: @xvftintrp_l_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrp.l.d(<4 x double> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvftintrp_l_d(v4f64 _1) { return __lasx_xvftintrp_l_d(_1); } +// CHECK-LABEL: @xvftintrm_w_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvftintrm_w_s(v8f32 _1) { return __lasx_xvftintrm_w_s(_1); } +// CHECK-LABEL: @xvftintrm_l_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrm.l.d(<4 x double> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvftintrm_l_d(v4f64 _1) { return __lasx_xvftintrm_l_d(_1); } +// CHECK-LABEL: @xvftint_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.w.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvftint_w_d(v4f64 _1, v4f64 _2) { return __lasx_xvftint_w_d(_1, _2); } +// CHECK-LABEL: @xvffint_s_l( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.l(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvffint_s_l(v4i64 _1, v4i64 _2) { return __lasx_xvffint_s_l(_1, _2); } +// CHECK-LABEL: @xvftintrz_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvftintrz_w_d(v4f64 _1, v4f64 _2) { return __lasx_xvftintrz_w_d(_1, _2); } +// CHECK-LABEL: @xvftintrp_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvftintrp_w_d(v4f64 _1, v4f64 _2) { return __lasx_xvftintrp_w_d(_1, _2); } +// CHECK-LABEL: @xvftintrm_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvftintrm_w_d(v4f64 _1, v4f64 _2) { return __lasx_xvftintrm_w_d(_1, _2); } +// CHECK-LABEL: @xvftintrne_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvftintrne_w_d(v4f64 _1, v4f64 _2) { return __lasx_xvftintrne_w_d(_1, _2); } +// CHECK-LABEL: @xvftinth_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftinth.l.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvftinth_l_s(v8f32 _1) { return __lasx_xvftinth_l_s(_1); } +// CHECK-LABEL: @xvftintl_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintl.l.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvftintl_l_s(v8f32 _1) { return __lasx_xvftintl_l_s(_1); } +// CHECK-LABEL: @xvffinth_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffinth.d.w(<8 x i32> [[_112]]) +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvffinth_d_w(v8i32 _1) { return __lasx_xvffinth_d_w(_1); } +// CHECK-LABEL: @xvffintl_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffintl.d.w(<8 x i32> [[_112]]) +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvffintl_d_w(v8i32 _1) { return __lasx_xvffintl_d_w(_1); } +// CHECK-LABEL: @xvftintrzh_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrzh.l.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvftintrzh_l_s(v8f32 _1) { return __lasx_xvftintrzh_l_s(_1); } +// CHECK-LABEL: @xvftintrzl_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrzl.l.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvftintrzl_l_s(v8f32 _1) { return __lasx_xvftintrzl_l_s(_1); } +// CHECK-LABEL: @xvftintrph_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrph.l.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvftintrph_l_s(v8f32 _1) { return __lasx_xvftintrph_l_s(_1); } +// CHECK-LABEL: @xvftintrpl_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrpl.l.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvftintrpl_l_s(v8f32 _1) { return __lasx_xvftintrpl_l_s(_1); } +// CHECK-LABEL: @xvftintrmh_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrmh.l.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvftintrmh_l_s(v8f32 _1) { return __lasx_xvftintrmh_l_s(_1); } +// CHECK-LABEL: @xvftintrml_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrml.l.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvftintrml_l_s(v8f32 _1) { return __lasx_xvftintrml_l_s(_1); } +// CHECK-LABEL: @xvftintrneh_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrneh.l.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvftintrneh_l_s(v8f32 _1) { return __lasx_xvftintrneh_l_s(_1); } +// CHECK-LABEL: @xvftintrnel_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrnel.l.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvftintrnel_l_s(v8f32 _1) { return __lasx_xvftintrnel_l_s(_1); } +// CHECK-LABEL: @xvfrintrne_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrne.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfrintrne_s(v8f32 _1) { return __lasx_xvfrintrne_s(_1); } +// CHECK-LABEL: @xvfrintrne_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrne.d(<4 x double> [[_1]]) +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfrintrne_d(v4f64 _1) { return __lasx_xvfrintrne_d(_1); } +// CHECK-LABEL: @xvfrintrz_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrz.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfrintrz_s(v8f32 _1) { return __lasx_xvfrintrz_s(_1); } +// CHECK-LABEL: @xvfrintrz_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrz.d(<4 x double> [[_1]]) +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfrintrz_d(v4f64 _1) { return __lasx_xvfrintrz_d(_1); } +// CHECK-LABEL: @xvfrintrp_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrp.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfrintrp_s(v8f32 _1) { return __lasx_xvfrintrp_s(_1); } +// CHECK-LABEL: @xvfrintrp_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrp.d(<4 x double> [[_1]]) +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfrintrp_d(v4f64 _1) { return __lasx_xvfrintrp_d(_1); } +// CHECK-LABEL: @xvfrintrm_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrm.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfrintrm_s(v8f32 _1) { return __lasx_xvfrintrm_s(_1); } +// CHECK-LABEL: @xvfrintrm_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrm.d(<4 x double> [[_1]]) +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfrintrm_d(v4f64 _1) { return __lasx_xvfrintrm_d(_1); } +// CHECK-LABEL: @xvld( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvld(ptr [[_1:%.*]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvld(void * _1) { return __lasx_xvld(_1, 1); } +// CHECK-LABEL: @xvst( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvst(<32 x i8> [[_1]], ptr [[_2:%.*]], i32 1) +// CHECK-NEXT: ret void +// +void xvst(v32i8 _1, void * _2) { return __lasx_xvst(_1, _2, 1); } +// CHECK-LABEL: @xvstelm_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.b(<32 x i8> [[_1]], ptr [[_2:%.*]], i32 1, i32 1) +// CHECK-NEXT: ret void +// +void xvstelm_b(v32i8 _1, void * _2) { return __lasx_xvstelm_b(_1, _2, 1, 1); } +// CHECK-LABEL: @xvstelm_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.h(<16 x i16> [[_1]], ptr [[_2:%.*]], i32 2, i32 1) +// CHECK-NEXT: ret void +// +void xvstelm_h(v16i16 _1, void * _2) { return __lasx_xvstelm_h(_1, _2, 2, 1); } +// CHECK-LABEL: @xvstelm_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.w(<8 x i32> [[_1]], ptr [[_2:%.*]], i32 4, i32 1) +// CHECK-NEXT: ret void +// +void xvstelm_w(v8i32 _1, void * _2) { return __lasx_xvstelm_w(_1, _2, 4, 1); } +// CHECK-LABEL: @xvstelm_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.d(<4 x i64> [[_1]], ptr [[_2:%.*]], i32 8, i32 1) +// CHECK-NEXT: ret void +// +void xvstelm_d(v4i64 _1, void * _2) { return __lasx_xvstelm_d(_1, _2, 8, 1); } +// CHECK-LABEL: @xvinsve0_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvinsve0.w(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvinsve0_w(v8i32 _1, v8i32 _2) { return __lasx_xvinsve0_w(_1, _2, 1); } +// CHECK-LABEL: @xvinsve0_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvinsve0.d(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvinsve0_d(v4i64 _1, v4i64 _2) { return __lasx_xvinsve0_d(_1, _2, 1); } +// CHECK-LABEL: @xvpickve_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickve.w(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvpickve_w(v8i32 _1) { return __lasx_xvpickve_w(_1, 1); } +// CHECK-LABEL: @xvpickve_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickve.d(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvpickve_d(v4i64 _1) { return __lasx_xvpickve_d(_1, 1); } +// CHECK-LABEL: @xvssrlrn_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.b.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvssrlrn_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvssrlrn_b_h(_1, _2); } +// CHECK-LABEL: @xvssrlrn_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.h.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvssrlrn_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvssrlrn_h_w(_1, _2); } +// CHECK-LABEL: @xvssrlrn_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvssrlrn_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvssrlrn_w_d(_1, _2); } +// CHECK-LABEL: @xvssrln_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrln.b.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvssrln_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvssrln_b_h(_1, _2); } +// CHECK-LABEL: @xvssrln_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrln.h.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvssrln_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvssrln_h_w(_1, _2); } +// CHECK-LABEL: @xvssrln_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrln.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvssrln_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvssrln_w_d(_1, _2); } +// CHECK-LABEL: @xvorn_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvorn.v(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvorn_v(v32i8 _1, v32i8 _2) { return __lasx_xvorn_v(_1, _2); } +// CHECK-LABEL: @xvldi( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvldi(i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvldi() { return __lasx_xvldi(1); } +// CHECK-LABEL: @xvldx( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvldx(ptr [[_1:%.*]], i64 1), !noalias [[META5:![0-9]+]] +// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvldx(void * _1) { return __lasx_xvldx(_1, 1); } +// CHECK-LABEL: @xvstx( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstx(<32 x i8> [[_112]], ptr [[_2:%.*]], i64 1) +// CHECK-NEXT: ret void +// +void xvstx(v32i8 _1, void * _2) { return __lasx_xvstx(_1, _2, 1); } +// CHECK-LABEL: @xvextl_qu_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextl.qu.du(<4 x i64> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvextl_qu_du(v4u64 _1) { return __lasx_xvextl_qu_du(_1); } +// CHECK-LABEL: @xvinsgr2vr_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w(<8 x i32> [[_1]], i32 1, i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvinsgr2vr_w(v8i32 _1) { return __lasx_xvinsgr2vr_w(_1, 1, 1); } +// CHECK-LABEL: @xvinsgr2vr_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d(<4 x i64> [[_1]], i64 1, i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvinsgr2vr_d(v4i64 _1) { return __lasx_xvinsgr2vr_d(_1, 1, 1); } +// CHECK-LABEL: @xvreplve0_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve0.b(<32 x i8> [[_112]]) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvreplve0_b(v32i8 _1) { return __lasx_xvreplve0_b(_1); } +// CHECK-LABEL: @xvreplve0_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplve0.h(<16 x i16> [[_112]]) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvreplve0_h(v16i16 _1) { return __lasx_xvreplve0_h(_1); } +// CHECK-LABEL: @xvreplve0_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplve0.w(<8 x i32> [[_112]]) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvreplve0_w(v8i32 _1) { return __lasx_xvreplve0_w(_1); } +// CHECK-LABEL: @xvreplve0_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplve0.d(<4 x i64> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvreplve0_d(v4i64 _1) { return __lasx_xvreplve0_d(_1); } +// CHECK-LABEL: @xvreplve0_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve0.q(<32 x i8> [[_112]]) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvreplve0_q(v32i8 _1) { return __lasx_xvreplve0_q(_1); } +// CHECK-LABEL: @vext2xv_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.vext2xv.h.b(<32 x i8> [[_112]]) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 vext2xv_h_b(v32i8 _1) { return __lasx_vext2xv_h_b(_1); } +// CHECK-LABEL: @vext2xv_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.h(<16 x i16> [[_112]]) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 vext2xv_w_h(v16i16 _1) { return __lasx_vext2xv_w_h(_1); } +// CHECK-LABEL: @vext2xv_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.w(<8 x i32> [[_112]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 vext2xv_d_w(v8i32 _1) { return __lasx_vext2xv_d_w(_1); } +// CHECK-LABEL: @vext2xv_w_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.b(<32 x i8> [[_112]]) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 vext2xv_w_b(v32i8 _1) { return __lasx_vext2xv_w_b(_1); } +// CHECK-LABEL: @vext2xv_d_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.h(<16 x i16> [[_112]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 vext2xv_d_h(v16i16 _1) { return __lasx_vext2xv_d_h(_1); } +// CHECK-LABEL: @vext2xv_d_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.b(<32 x i8> [[_112]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 vext2xv_d_b(v32i8 _1) { return __lasx_vext2xv_d_b(_1); } +// CHECK-LABEL: @vext2xv_hu_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.vext2xv.hu.bu(<32 x i8> [[_112]]) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 vext2xv_hu_bu(v32i8 _1) { return __lasx_vext2xv_hu_bu(_1); } +// CHECK-LABEL: @vext2xv_wu_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.hu(<16 x i16> [[_112]]) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 vext2xv_wu_hu(v16i16 _1) { return __lasx_vext2xv_wu_hu(_1); } +// CHECK-LABEL: @vext2xv_du_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.wu(<8 x i32> [[_112]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 vext2xv_du_wu(v8i32 _1) { return __lasx_vext2xv_du_wu(_1); } +// CHECK-LABEL: @vext2xv_wu_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.bu(<32 x i8> [[_112]]) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 vext2xv_wu_bu(v32i8 _1) { return __lasx_vext2xv_wu_bu(_1); } +// CHECK-LABEL: @vext2xv_du_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.hu(<16 x i16> [[_112]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 vext2xv_du_hu(v16i16 _1) { return __lasx_vext2xv_du_hu(_1); } +// CHECK-LABEL: @vext2xv_du_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.bu(<32 x i8> [[_112]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 vext2xv_du_bu(v32i8 _1) { return __lasx_vext2xv_du_bu(_1); } +// CHECK-LABEL: @xvpermi_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpermi.q(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvpermi_q(v32i8 _1, v32i8 _2) { return __lasx_xvpermi_q(_1, _2, 1); } +// CHECK-LABEL: @xvpermi_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpermi.d(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvpermi_d(v4i64 _1) { return __lasx_xvpermi_d(_1, 1); } +// CHECK-LABEL: @xvperm_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvperm.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvperm_w(v8i32 _1, v8i32 _2) { return __lasx_xvperm_w(_1, _2); } +// CHECK-LABEL: @xvldrepl_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(ptr [[_1:%.*]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvldrepl_b(void * _1) { return __lasx_xvldrepl_b(_1, 1); } +// CHECK-LABEL: @xvldrepl_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvldrepl.h(ptr [[_1:%.*]], i32 2) +// CHECK-NEXT: store <16 x i16> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvldrepl_h(void * _1) { return __lasx_xvldrepl_h(_1, 2); } +// CHECK-LABEL: @xvldrepl_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvldrepl.w(ptr [[_1:%.*]], i32 4) +// CHECK-NEXT: store <8 x i32> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvldrepl_w(void * _1) { return __lasx_xvldrepl_w(_1, 4); } +// CHECK-LABEL: @xvldrepl_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvldrepl.d(ptr [[_1:%.*]], i32 8) +// CHECK-NEXT: store <4 x i64> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvldrepl_d(void * _1) { return __lasx_xvldrepl_d(_1, 8); } +// CHECK-LABEL: @xvpickve2gr_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xvpickve2gr.w(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int xvpickve2gr_w(v8i32 _1) { return __lasx_xvpickve2gr_w(_1, 1); } +// CHECK-LABEL: @xvpickve2gr_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xvpickve2gr.wu(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: ret i32 [[TMP1]] +// +unsigned int xvpickve2gr_wu(v8i32 _1) { return __lasx_xvpickve2gr_wu(_1, 1); } +// CHECK-LABEL: @xvpickve2gr_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.loongarch.lasx.xvpickve2gr.d(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: ret i64 [[TMP1]] +// +long xvpickve2gr_d(v4i64 _1) { return __lasx_xvpickve2gr_d(_1, 1); } +// CHECK-LABEL: @xvpickve2gr_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.loongarch.lasx.xvpickve2gr.du(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: ret i64 [[TMP1]] +// +unsigned long int xvpickve2gr_du(v4i64 _1) { return __lasx_xvpickve2gr_du(_1, 1); } +// CHECK-LABEL: @xvaddwev_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvaddwev_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvaddwev_q_d(_1, _2); } +// CHECK-LABEL: @xvaddwev_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvaddwev_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvaddwev_d_w(_1, _2); } +// CHECK-LABEL: @xvaddwev_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvaddwev_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvaddwev_w_h(_1, _2); } +// CHECK-LABEL: @xvaddwev_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvaddwev_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvaddwev_h_b(_1, _2); } +// CHECK-LABEL: @xvaddwev_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvaddwev_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvaddwev_q_du(_1, _2); } +// CHECK-LABEL: @xvaddwev_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvaddwev_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvaddwev_d_wu(_1, _2); } +// CHECK-LABEL: @xvaddwev_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvaddwev_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvaddwev_w_hu(_1, _2); } +// CHECK-LABEL: @xvaddwev_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvaddwev_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvaddwev_h_bu(_1, _2); } +// CHECK-LABEL: @xvsubwev_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsubwev_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvsubwev_q_d(_1, _2); } +// CHECK-LABEL: @xvsubwev_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsubwev_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvsubwev_d_w(_1, _2); } +// CHECK-LABEL: @xvsubwev_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsubwev_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvsubwev_w_h(_1, _2); } +// CHECK-LABEL: @xvsubwev_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsubwev_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvsubwev_h_b(_1, _2); } +// CHECK-LABEL: @xvsubwev_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsubwev_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvsubwev_q_du(_1, _2); } +// CHECK-LABEL: @xvsubwev_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsubwev_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvsubwev_d_wu(_1, _2); } +// CHECK-LABEL: @xvsubwev_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsubwev_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvsubwev_w_hu(_1, _2); } +// CHECK-LABEL: @xvsubwev_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsubwev_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvsubwev_h_bu(_1, _2); } +// CHECK-LABEL: @xvmulwev_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmulwev_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvmulwev_q_d(_1, _2); } +// CHECK-LABEL: @xvmulwev_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmulwev_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvmulwev_d_w(_1, _2); } +// CHECK-LABEL: @xvmulwev_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvmulwev_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvmulwev_w_h(_1, _2); } +// CHECK-LABEL: @xvmulwev_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvmulwev_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvmulwev_h_b(_1, _2); } +// CHECK-LABEL: @xvmulwev_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmulwev_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvmulwev_q_du(_1, _2); } +// CHECK-LABEL: @xvmulwev_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmulwev_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmulwev_d_wu(_1, _2); } +// CHECK-LABEL: @xvmulwev_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvmulwev_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmulwev_w_hu(_1, _2); } +// CHECK-LABEL: @xvmulwev_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvmulwev_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmulwev_h_bu(_1, _2); } +// CHECK-LABEL: @xvaddwod_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvaddwod_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvaddwod_q_d(_1, _2); } +// CHECK-LABEL: @xvaddwod_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvaddwod_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvaddwod_d_w(_1, _2); } +// CHECK-LABEL: @xvaddwod_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvaddwod_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvaddwod_w_h(_1, _2); } +// CHECK-LABEL: @xvaddwod_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvaddwod_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvaddwod_h_b(_1, _2); } +// CHECK-LABEL: @xvaddwod_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvaddwod_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvaddwod_q_du(_1, _2); } +// CHECK-LABEL: @xvaddwod_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvaddwod_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvaddwod_d_wu(_1, _2); } +// CHECK-LABEL: @xvaddwod_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvaddwod_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvaddwod_w_hu(_1, _2); } +// CHECK-LABEL: @xvaddwod_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvaddwod_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvaddwod_h_bu(_1, _2); } +// CHECK-LABEL: @xvsubwod_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsubwod_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvsubwod_q_d(_1, _2); } +// CHECK-LABEL: @xvsubwod_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsubwod_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvsubwod_d_w(_1, _2); } +// CHECK-LABEL: @xvsubwod_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsubwod_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvsubwod_w_h(_1, _2); } +// CHECK-LABEL: @xvsubwod_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsubwod_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvsubwod_h_b(_1, _2); } +// CHECK-LABEL: @xvsubwod_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsubwod_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvsubwod_q_du(_1, _2); } +// CHECK-LABEL: @xvsubwod_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsubwod_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvsubwod_d_wu(_1, _2); } +// CHECK-LABEL: @xvsubwod_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsubwod_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvsubwod_w_hu(_1, _2); } +// CHECK-LABEL: @xvsubwod_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsubwod_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvsubwod_h_bu(_1, _2); } +// CHECK-LABEL: @xvmulwod_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmulwod_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvmulwod_q_d(_1, _2); } +// CHECK-LABEL: @xvmulwod_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmulwod_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvmulwod_d_w(_1, _2); } +// CHECK-LABEL: @xvmulwod_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvmulwod_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvmulwod_w_h(_1, _2); } +// CHECK-LABEL: @xvmulwod_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvmulwod_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvmulwod_h_b(_1, _2); } +// CHECK-LABEL: @xvmulwod_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmulwod_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvmulwod_q_du(_1, _2); } +// CHECK-LABEL: @xvmulwod_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmulwod_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmulwod_d_wu(_1, _2); } +// CHECK-LABEL: @xvmulwod_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvmulwod_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmulwod_w_hu(_1, _2); } +// CHECK-LABEL: @xvmulwod_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvmulwod_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmulwod_h_bu(_1, _2); } +// CHECK-LABEL: @xvaddwev_d_wu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvaddwev_d_wu_w(v8u32 _1, v8i32 _2) { return __lasx_xvaddwev_d_wu_w(_1, _2); } +// CHECK-LABEL: @xvaddwev_w_hu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvaddwev_w_hu_h(v16u16 _1, v16i16 _2) { return __lasx_xvaddwev_w_hu_h(_1, _2); } +// CHECK-LABEL: @xvaddwev_h_bu_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvaddwev_h_bu_b(v32u8 _1, v32i8 _2) { return __lasx_xvaddwev_h_bu_b(_1, _2); } +// CHECK-LABEL: @xvmulwev_d_wu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmulwev_d_wu_w(v8u32 _1, v8i32 _2) { return __lasx_xvmulwev_d_wu_w(_1, _2); } +// CHECK-LABEL: @xvmulwev_w_hu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvmulwev_w_hu_h(v16u16 _1, v16i16 _2) { return __lasx_xvmulwev_w_hu_h(_1, _2); } +// CHECK-LABEL: @xvmulwev_h_bu_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvmulwev_h_bu_b(v32u8 _1, v32i8 _2) { return __lasx_xvmulwev_h_bu_b(_1, _2); } +// CHECK-LABEL: @xvaddwod_d_wu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvaddwod_d_wu_w(v8u32 _1, v8i32 _2) { return __lasx_xvaddwod_d_wu_w(_1, _2); } +// CHECK-LABEL: @xvaddwod_w_hu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvaddwod_w_hu_h(v16u16 _1, v16i16 _2) { return __lasx_xvaddwod_w_hu_h(_1, _2); } +// CHECK-LABEL: @xvaddwod_h_bu_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvaddwod_h_bu_b(v32u8 _1, v32i8 _2) { return __lasx_xvaddwod_h_bu_b(_1, _2); } +// CHECK-LABEL: @xvmulwod_d_wu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmulwod_d_wu_w(v8u32 _1, v8i32 _2) { return __lasx_xvmulwod_d_wu_w(_1, _2); } +// CHECK-LABEL: @xvmulwod_w_hu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvmulwod_w_hu_h(v16u16 _1, v16i16 _2) { return __lasx_xvmulwod_w_hu_h(_1, _2); } +// CHECK-LABEL: @xvmulwod_h_bu_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvmulwod_h_bu_b(v32u8 _1, v32i8 _2) { return __lasx_xvmulwod_h_bu_b(_1, _2); } +// CHECK-LABEL: @xvhaddw_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvhaddw_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvhaddw_q_d(_1, _2); } +// CHECK-LABEL: @xvhaddw_qu_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.qu.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvhaddw_qu_du(v4u64 _1, v4u64 _2) { return __lasx_xvhaddw_qu_du(_1, _2); } +// CHECK-LABEL: @xvhsubw_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvhsubw_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvhsubw_q_d(_1, _2); } +// CHECK-LABEL: @xvhsubw_qu_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.qu.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvhsubw_qu_du(v4u64 _1, v4u64 _2) { return __lasx_xvhsubw_qu_du(_1, _2); } +// CHECK-LABEL: @xvmaddwev_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmaddwev_q_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __lasx_xvmaddwev_q_d(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwev_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_346:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.w(<4 x i64> [[_1]], <8 x i32> [[_235]], <8 x i32> [[_346]]) +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmaddwev_d_w(v4i64 _1, v8i32 _2, v8i32 _3) { return __lasx_xvmaddwev_d_w(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwev_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.h(<8 x i32> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]]) +// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvmaddwev_w_h(v8i32 _1, v16i16 _2, v16i16 _3) { return __lasx_xvmaddwev_w_h(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwev_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.b(<16 x i16> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]]) +// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvmaddwev_h_b(v16i16 _1, v32i8 _2, v32i8 _3) { return __lasx_xvmaddwev_h_b(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwev_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvmaddwev_q_du(v4u64 _1, v4u64 _2, v4u64 _3) { return __lasx_xvmaddwev_q_du(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwev_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_346:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu(<4 x i64> [[_1]], <8 x i32> [[_235]], <8 x i32> [[_346]]) +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvmaddwev_d_wu(v4u64 _1, v8u32 _2, v8u32 _3) { return __lasx_xvmaddwev_d_wu(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwev_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu(<8 x i32> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]]) +// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvmaddwev_w_hu(v8u32 _1, v16u16 _2, v16u16 _3) { return __lasx_xvmaddwev_w_hu(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwev_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu(<16 x i16> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]]) +// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvmaddwev_h_bu(v16u16 _1, v32u8 _2, v32u8 _3) { return __lasx_xvmaddwev_h_bu(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwod_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmaddwod_q_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __lasx_xvmaddwod_q_d(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwod_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_346:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.w(<4 x i64> [[_1]], <8 x i32> [[_235]], <8 x i32> [[_346]]) +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmaddwod_d_w(v4i64 _1, v8i32 _2, v8i32 _3) { return __lasx_xvmaddwod_d_w(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwod_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.h(<8 x i32> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]]) +// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvmaddwod_w_h(v8i32 _1, v16i16 _2, v16i16 _3) { return __lasx_xvmaddwod_w_h(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwod_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.b(<16 x i16> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]]) +// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvmaddwod_h_b(v16i16 _1, v32i8 _2, v32i8 _3) { return __lasx_xvmaddwod_h_b(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwod_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvmaddwod_q_du(v4u64 _1, v4u64 _2, v4u64 _3) { return __lasx_xvmaddwod_q_du(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwod_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_346:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu(<4 x i64> [[_1]], <8 x i32> [[_235]], <8 x i32> [[_346]]) +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvmaddwod_d_wu(v4u64 _1, v8u32 _2, v8u32 _3) { return __lasx_xvmaddwod_d_wu(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwod_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu(<8 x i32> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]]) +// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvmaddwod_w_hu(v8u32 _1, v16u16 _2, v16u16 _3) { return __lasx_xvmaddwod_w_hu(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwod_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu(<16 x i16> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]]) +// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvmaddwod_h_bu(v16u16 _1, v32u8 _2, v32u8 _3) { return __lasx_xvmaddwod_h_bu(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwev_q_du_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmaddwev_q_du_d(v4i64 _1, v4u64 _2, v4i64 _3) { return __lasx_xvmaddwev_q_du_d(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwev_d_wu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_346:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu.w(<4 x i64> [[_1]], <8 x i32> [[_235]], <8 x i32> [[_346]]) +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmaddwev_d_wu_w(v4i64 _1, v8u32 _2, v8i32 _3) { return __lasx_xvmaddwev_d_wu_w(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwev_w_hu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu.h(<8 x i32> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]]) +// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvmaddwev_w_hu_h(v8i32 _1, v16u16 _2, v16i16 _3) { return __lasx_xvmaddwev_w_hu_h(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwev_h_bu_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu.b(<16 x i16> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]]) +// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvmaddwev_h_bu_b(v16i16 _1, v32u8 _2, v32i8 _3) { return __lasx_xvmaddwev_h_bu_b(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwod_q_du_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmaddwod_q_du_d(v4i64 _1, v4u64 _2, v4i64 _3) { return __lasx_xvmaddwod_q_du_d(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwod_d_wu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_346:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu.w(<4 x i64> [[_1]], <8 x i32> [[_235]], <8 x i32> [[_346]]) +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmaddwod_d_wu_w(v4i64 _1, v8u32 _2, v8i32 _3) { return __lasx_xvmaddwod_d_wu_w(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwod_w_hu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_136:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_247:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_358:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu.h(<8 x i32> [[_136]], <16 x i16> [[_247]], <16 x i16> [[_358]]) +// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvmaddwod_w_hu_h(v8i32 _1, v16u16 _2, v16i16 _3) { return __lasx_xvmaddwod_w_hu_h(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwod_h_bu_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_136:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_247:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_358:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu.b(<16 x i16> [[_136]], <32 x i8> [[_247]], <32 x i8> [[_358]]) +// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvmaddwod_h_bu_b(v16i16 _1, v32u8 _2, v32i8 _3) { return __lasx_xvmaddwod_h_bu_b(_1, _2, _3); } +// CHECK-LABEL: @xvrotr_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrotr.b(<32 x i8> [[_124]], <32 x i8> [[_235]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvrotr_b(v32i8 _1, v32i8 _2) { return __lasx_xvrotr_b(_1, _2); } +// CHECK-LABEL: @xvrotr_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrotr.h(<16 x i16> [[_124]], <16 x i16> [[_235]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvrotr_h(v16i16 _1, v16i16 _2) { return __lasx_xvrotr_h(_1, _2); } +// CHECK-LABEL: @xvrotr_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_124:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_235:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrotr.w(<8 x i32> [[_124]], <8 x i32> [[_235]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvrotr_w(v8i32 _1, v8i32 _2) { return __lasx_xvrotr_w(_1, _2); } +// CHECK-LABEL: @xvrotr_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrotr.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvrotr_d(v4i64 _1, v4i64 _2) { return __lasx_xvrotr_d(_1, _2); } +// CHECK-LABEL: @xvadd_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadd.q(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvadd_q(v4i64 _1, v4i64 _2) { return __lasx_xvadd_q(_1, _2); } +// CHECK-LABEL: @xvsub_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsub.q(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsub_q(v4i64 _1, v4i64 _2) { return __lasx_xvsub_q(_1, _2); } +// CHECK-LABEL: @xvaddwev_q_du_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvaddwev_q_du_d(v4u64 _1, v4i64 _2) { return __lasx_xvaddwev_q_du_d(_1, _2); } +// CHECK-LABEL: @xvaddwod_q_du_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvaddwod_q_du_d(v4u64 _1, v4i64 _2) { return __lasx_xvaddwod_q_du_d(_1, _2); } +// CHECK-LABEL: @xvmulwev_q_du_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmulwev_q_du_d(v4u64 _1, v4i64 _2) { return __lasx_xvmulwev_q_du_d(_1, _2); } +// CHECK-LABEL: @xvmulwod_q_du_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmulwod_q_du_d(v4u64 _1, v4i64 _2) { return __lasx_xvmulwod_q_du_d(_1, _2); } +// CHECK-LABEL: @xvmskgez_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmskgez.b(<32 x i8> [[_112]]) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvmskgez_b(v32i8 _1) { return __lasx_xvmskgez_b(_1); } +// CHECK-LABEL: @xvmsknz_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmsknz.b(<32 x i8> [[_112]]) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvmsknz_b(v32i8 _1) { return __lasx_xvmsknz_b(_1); } +// CHECK-LABEL: @xvexth_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvexth.h.b(<32 x i8> [[_112]]) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvexth_h_b(v32i8 _1) { return __lasx_xvexth_h_b(_1); } +// CHECK-LABEL: @xvexth_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvexth.w.h(<16 x i16> [[_112]]) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvexth_w_h(v16i16 _1) { return __lasx_xvexth_w_h(_1); } +// CHECK-LABEL: @xvexth_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.d.w(<8 x i32> [[_112]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvexth_d_w(v8i32 _1) { return __lasx_xvexth_d_w(_1); } +// CHECK-LABEL: @xvexth_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.q.d(<4 x i64> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvexth_q_d(v4i64 _1) { return __lasx_xvexth_q_d(_1); } +// CHECK-LABEL: @xvexth_hu_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvexth.hu.bu(<32 x i8> [[_112]]) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvexth_hu_bu(v32u8 _1) { return __lasx_xvexth_hu_bu(_1); } +// CHECK-LABEL: @xvexth_wu_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvexth.wu.hu(<16 x i16> [[_112]]) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvexth_wu_hu(v16u16 _1) { return __lasx_xvexth_wu_hu(_1); } +// CHECK-LABEL: @xvexth_du_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_112:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.du.wu(<8 x i32> [[_112]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvexth_du_wu(v8u32 _1) { return __lasx_xvexth_du_wu(_1); } +// CHECK-LABEL: @xvexth_qu_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.qu.du(<4 x i64> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvexth_qu_du(v4u64 _1) { return __lasx_xvexth_qu_du(_1); } +// CHECK-LABEL: @xvrotri_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrotri.b(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvrotri_b(v32i8 _1) { return __lasx_xvrotri_b(_1, 1); } +// CHECK-LABEL: @xvrotri_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrotri.h(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvrotri_h(v16i16 _1) { return __lasx_xvrotri_h(_1, 1); } +// CHECK-LABEL: @xvrotri_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrotri.w(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvrotri_w(v8i32 _1) { return __lasx_xvrotri_w(_1, 1); } +// CHECK-LABEL: @xvrotri_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrotri.d(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvrotri_d(v4i64 _1) { return __lasx_xvrotri_d(_1, 1); } +// CHECK-LABEL: @xvextl_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextl.q.d(<4 x i64> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvextl_q_d(v4i64 _1) { return __lasx_xvextl_q_d(_1); } +// CHECK-LABEL: @xvsrlni_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsrlni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvsrlni_b_h(_1, _2, 1); } +// CHECK-LABEL: @xvsrlni_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsrlni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvsrlni_h_w(_1, _2, 1); } +// CHECK-LABEL: @xvsrlni_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsrlni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvsrlni_w_d(_1, _2, 1); } +// CHECK-LABEL: @xvsrlni_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsrlni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvsrlni_d_q(_1, _2, 1); } +// CHECK-LABEL: @xvsrlrni_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlrni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsrlrni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvsrlrni_b_h(_1, _2, 1); } +// CHECK-LABEL: @xvsrlrni_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlrni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsrlrni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvsrlrni_h_w(_1, _2, 1); } +// CHECK-LABEL: @xvsrlrni_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlrni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsrlrni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvsrlrni_w_d(_1, _2, 1); } +// CHECK-LABEL: @xvsrlrni_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlrni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsrlrni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvsrlrni_d_q(_1, _2, 1); } +// CHECK-LABEL: @xvssrlni_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvssrlni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvssrlni_b_h(_1, _2, 1); } +// CHECK-LABEL: @xvssrlni_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvssrlni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvssrlni_h_w(_1, _2, 1); } +// CHECK-LABEL: @xvssrlni_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvssrlni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvssrlni_w_d(_1, _2, 1); } +// CHECK-LABEL: @xvssrlni_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvssrlni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvssrlni_d_q(_1, _2, 1); } +// CHECK-LABEL: @xvssrlni_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlni.bu.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvssrlni_bu_h(v32u8 _1, v32i8 _2) { return __lasx_xvssrlni_bu_h(_1, _2, 1); } +// CHECK-LABEL: @xvssrlni_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlni.hu.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvssrlni_hu_w(v16u16 _1, v16i16 _2) { return __lasx_xvssrlni_hu_w(_1, _2, 1); } +// CHECK-LABEL: @xvssrlni_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlni.wu.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvssrlni_wu_d(v8u32 _1, v8i32 _2) { return __lasx_xvssrlni_wu_d(_1, _2, 1); } +// CHECK-LABEL: @xvssrlni_du_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlni.du.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvssrlni_du_q(v4u64 _1, v4i64 _2) { return __lasx_xvssrlni_du_q(_1, _2, 1); } +// CHECK-LABEL: @xvssrlrni_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvssrlrni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvssrlrni_b_h(_1, _2, 1); } +// CHECK-LABEL: @xvssrlrni_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvssrlrni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvssrlrni_h_w(_1, _2, 1); } +// CHECK-LABEL: @xvssrlrni_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvssrlrni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvssrlrni_w_d(_1, _2, 1); } +// CHECK-LABEL: @xvssrlrni_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvssrlrni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvssrlrni_d_q(_1, _2, 1); } +// CHECK-LABEL: @xvssrlrni_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.bu.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvssrlrni_bu_h(v32u8 _1, v32i8 _2) { return __lasx_xvssrlrni_bu_h(_1, _2, 1); } +// CHECK-LABEL: @xvssrlrni_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.hu.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvssrlrni_hu_w(v16u16 _1, v16i16 _2) { return __lasx_xvssrlrni_hu_w(_1, _2, 1); } +// CHECK-LABEL: @xvssrlrni_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.wu.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvssrlrni_wu_d(v8u32 _1, v8i32 _2) { return __lasx_xvssrlrni_wu_d(_1, _2, 1); } +// CHECK-LABEL: @xvssrlrni_du_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.du.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvssrlrni_du_q(v4u64 _1, v4i64 _2) { return __lasx_xvssrlrni_du_q(_1, _2, 1); } +// CHECK-LABEL: @xvsrani_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrani.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsrani_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvsrani_b_h(_1, _2, 1); } +// CHECK-LABEL: @xvsrani_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrani.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsrani_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvsrani_h_w(_1, _2, 1); } +// CHECK-LABEL: @xvsrani_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrani.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsrani_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvsrani_w_d(_1, _2, 1); } +// CHECK-LABEL: @xvsrani_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrani.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsrani_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvsrani_d_q(_1, _2, 1); } +// CHECK-LABEL: @xvsrarni_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrarni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsrarni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvsrarni_b_h(_1, _2, 1); } +// CHECK-LABEL: @xvsrarni_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrarni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsrarni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvsrarni_h_w(_1, _2, 1); } +// CHECK-LABEL: @xvsrarni_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrarni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsrarni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvsrarni_w_d(_1, _2, 1); } +// CHECK-LABEL: @xvsrarni_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrarni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsrarni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvsrarni_d_q(_1, _2, 1); } +// CHECK-LABEL: @xvssrani_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrani.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvssrani_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvssrani_b_h(_1, _2, 1); } +// CHECK-LABEL: @xvssrani_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrani.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvssrani_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvssrani_h_w(_1, _2, 1); } +// CHECK-LABEL: @xvssrani_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrani.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvssrani_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvssrani_w_d(_1, _2, 1); } +// CHECK-LABEL: @xvssrani_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrani.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvssrani_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvssrani_d_q(_1, _2, 1); } +// CHECK-LABEL: @xvssrani_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrani.bu.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvssrani_bu_h(v32u8 _1, v32i8 _2) { return __lasx_xvssrani_bu_h(_1, _2, 1); } +// CHECK-LABEL: @xvssrani_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrani.hu.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvssrani_hu_w(v16u16 _1, v16i16 _2) { return __lasx_xvssrani_hu_w(_1, _2, 1); } +// CHECK-LABEL: @xvssrani_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrani.wu.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvssrani_wu_d(v8u32 _1, v8i32 _2) { return __lasx_xvssrani_wu_d(_1, _2, 1); } +// CHECK-LABEL: @xvssrani_du_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrani.du.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvssrani_du_q(v4u64 _1, v4i64 _2) { return __lasx_xvssrani_du_q(_1, _2, 1); } +// CHECK-LABEL: @xvssrarni_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvssrarni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvssrarni_b_h(_1, _2, 1); } +// CHECK-LABEL: @xvssrarni_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvssrarni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvssrarni_h_w(_1, _2, 1); } +// CHECK-LABEL: @xvssrarni_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvssrarni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvssrarni_w_d(_1, _2, 1); } +// CHECK-LABEL: @xvssrarni_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrarni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvssrarni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvssrarni_d_q(_1, _2, 1); } +// CHECK-LABEL: @xvssrarni_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarni.bu.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvssrarni_bu_h(v32u8 _1, v32i8 _2) { return __lasx_xvssrarni_bu_h(_1, _2, 1); } +// CHECK-LABEL: @xvssrarni_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarni.hu.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvssrarni_hu_w(v16u16 _1, v16i16 _2) { return __lasx_xvssrarni_hu_w(_1, _2, 1); } +// CHECK-LABEL: @xvssrarni_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarni.wu.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvssrarni_wu_d(v8u32 _1, v8i32 _2) { return __lasx_xvssrarni_wu_d(_1, _2, 1); } +// CHECK-LABEL: @xvssrarni_du_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrarni.du.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvssrarni_du_q(v4u64 _1, v4i64 _2) { return __lasx_xvssrarni_du_q(_1, _2, 1); } +// CHECK-LABEL: @xbnz_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.b(<32 x i8> [[_1]]) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int xbnz_b(v32u8 _1) { return __lasx_xbnz_b(_1); } +// CHECK-LABEL: @xbnz_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.d(<4 x i64> [[_1]]) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int xbnz_d(v4u64 _1) { return __lasx_xbnz_d(_1); } +// CHECK-LABEL: @xbnz_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.h(<16 x i16> [[_1]]) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int xbnz_h(v16u16 _1) { return __lasx_xbnz_h(_1); } +// CHECK-LABEL: @xbnz_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.v(<32 x i8> [[_1]]) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int xbnz_v(v32u8 _1) { return __lasx_xbnz_v(_1); } +// CHECK-LABEL: @xbnz_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.w(<8 x i32> [[_1]]) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int xbnz_w(v8u32 _1) { return __lasx_xbnz_w(_1); } +// CHECK-LABEL: @xbz_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.b(<32 x i8> [[_1]]) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int xbz_b(v32u8 _1) { return __lasx_xbz_b(_1); } +// CHECK-LABEL: @xbz_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.d(<4 x i64> [[_1]]) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int xbz_d(v4u64 _1) { return __lasx_xbz_d(_1); } +// CHECK-LABEL: @xbz_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.h(<16 x i16> [[_1]]) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int xbz_h(v16u16 _1) { return __lasx_xbz_h(_1); } +// CHECK-LABEL: @xbz_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.v(<32 x i8> [[_1]]) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int xbz_v(v32u8 _1) { return __lasx_xbz_v(_1); } +// CHECK-LABEL: @xbz_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.w(<8 x i32> [[_1]]) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int xbz_w(v8u32 _1) { return __lasx_xbz_w(_1); } +// CHECK-LABEL: @xvfcmp_caf_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.caf.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfcmp_caf_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_caf_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_caf_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.caf.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfcmp_caf_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_caf_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_ceq_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.ceq.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfcmp_ceq_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_ceq_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_ceq_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.ceq.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfcmp_ceq_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_ceq_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_cle_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cle.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfcmp_cle_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cle_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_cle_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cle.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfcmp_cle_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cle_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_clt_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.clt.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfcmp_clt_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_clt_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_clt_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.clt.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfcmp_clt_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_clt_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_cne_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cne.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfcmp_cne_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cne_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_cne_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cne.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfcmp_cne_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cne_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_cor_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cor.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfcmp_cor_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cor_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_cor_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cor.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfcmp_cor_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cor_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_cueq_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cueq.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfcmp_cueq_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cueq_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_cueq_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cueq.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfcmp_cueq_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cueq_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_cule_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cule.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfcmp_cule_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cule_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_cule_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cule.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfcmp_cule_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cule_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_cult_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cult.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfcmp_cult_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cult_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_cult_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cult.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfcmp_cult_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cult_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_cun_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cun.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfcmp_cun_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cun_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_cune_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cune.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfcmp_cune_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cune_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_cune_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cune.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfcmp_cune_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cune_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_cun_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cun.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfcmp_cun_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cun_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_saf_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.saf.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfcmp_saf_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_saf_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_saf_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.saf.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfcmp_saf_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_saf_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_seq_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.seq.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfcmp_seq_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_seq_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_seq_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.seq.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfcmp_seq_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_seq_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_sle_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sle.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfcmp_sle_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sle_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_sle_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sle.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfcmp_sle_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sle_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_slt_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.slt.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfcmp_slt_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_slt_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_slt_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.slt.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfcmp_slt_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_slt_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_sne_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sne.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfcmp_sne_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sne_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_sne_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sne.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfcmp_sne_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sne_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_sor_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sor.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfcmp_sor_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sor_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_sor_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sor.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfcmp_sor_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sor_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_sueq_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sueq.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfcmp_sueq_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sueq_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_sueq_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sueq.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfcmp_sueq_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sueq_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_sule_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sule.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfcmp_sule_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sule_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_sule_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sule.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfcmp_sule_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sule_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_sult_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sult.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfcmp_sult_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sult_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_sult_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sult.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfcmp_sult_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sult_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_sun_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sun.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfcmp_sun_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sun_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_sune_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sune.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfcmp_sune_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sune_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_sune_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sune.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfcmp_sune_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sune_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_sun_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sun.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfcmp_sun_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sun_s(_1, _2); } +// CHECK-LABEL: @xvpickve_d_f( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvpickve.d.f(<4 x double> [[_1]], i32 1) +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvpickve_d_f(v4f64 _1) { return __lasx_xvpickve_d_f(_1, 1); } +// CHECK-LABEL: @xvpickve_w_f( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvpickve.w.f(<8 x float> [[_1]], i32 1) +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvpickve_w_f(v8f32 _1) { return __lasx_xvpickve_w_f(_1, 1); } +// CHECK-LABEL: @xvrepli_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrepli.b(i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvrepli_b() { return __lasx_xvrepli_b(1); } +// CHECK-LABEL: @xvrepli_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrepli.d(i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvrepli_d() { return __lasx_xvrepli_d(1); } +// CHECK-LABEL: @xvrepli_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrepli.h(i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvrepli_h() { return __lasx_xvrepli_h(1); } +// CHECK-LABEL: @xvrepli_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrepli.w(i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvrepli_w() { return __lasx_xvrepli_w(1); } diff --git a/clang/test/CodeGen/LoongArch/lasx/builtin-error.c b/clang/test/CodeGen/LoongArch/lasx/builtin-error.c new file mode 100644 index 0000000000000000000000000000000000000000..724484465769e0e2da46fc738a1e003c4f7d1f7f --- /dev/null +++ b/clang/test/CodeGen/LoongArch/lasx/builtin-error.c @@ -0,0 +1,1392 @@ +// RUN: %clang_cc1 -triple loongarch64 -target-feature +lasx -verify %s + +typedef signed char v32i8 __attribute__((vector_size(32), aligned(32))); +typedef signed char v32i8_b __attribute__((vector_size(32), aligned(1))); +typedef unsigned char v32u8 __attribute__((vector_size(32), aligned(32))); +typedef unsigned char v32u8_b __attribute__((vector_size(32), aligned(1))); +typedef short v16i16 __attribute__((vector_size(32), aligned(32))); +typedef short v16i16_h __attribute__((vector_size(32), aligned(2))); +typedef unsigned short v16u16 __attribute__((vector_size(32), aligned(32))); +typedef unsigned short v16u16_h __attribute__((vector_size(32), aligned(2))); +typedef int v8i32 __attribute__((vector_size(32), aligned(32))); +typedef int v8i32_w __attribute__((vector_size(32), aligned(4))); +typedef unsigned int v8u32 __attribute__((vector_size(32), aligned(32))); +typedef unsigned int v8u32_w __attribute__((vector_size(32), aligned(4))); +typedef long long v4i64 __attribute__((vector_size(32), aligned(32))); +typedef long long v4i64_d __attribute__((vector_size(32), aligned(8))); +typedef unsigned long long v4u64 __attribute__((vector_size(32), aligned(32))); +typedef unsigned long long v4u64_d __attribute__((vector_size(32), aligned(8))); +typedef float v8f32 __attribute__((vector_size(32), aligned(32))); +typedef float v8f32_w __attribute__((vector_size(32), aligned(4))); +typedef double v4f64 __attribute__((vector_size(32), aligned(32))); +typedef double v4f64_d __attribute__((vector_size(32), aligned(8))); + +v32i8 xvslli_b(v32i8 _1, int var) { + v32i8 res = __builtin_lasx_xvslli_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvslli_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvslli_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvslli_b' must be a constant integer}} + return res; +} + +v16i16 xvslli_h(v16i16 _1, int var) { + v16i16 res = __builtin_lasx_xvslli_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvslli_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvslli_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvslli_h' must be a constant integer}} + return res; +} + +v8i32 xvslli_w(v8i32 _1, int var) { + v8i32 res = __builtin_lasx_xvslli_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvslli_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvslli_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvslli_w' must be a constant integer}} + return res; +} + +v4i64 xvslli_d(v4i64 _1, int var) { + v4i64 res = __builtin_lasx_xvslli_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvslli_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvslli_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvslli_d' must be a constant integer}} + return res; +} + +v32i8 xvsrai_b(v32i8 _1, int var) { + v32i8 res = __builtin_lasx_xvsrai_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvsrai_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvsrai_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrai_b' must be a constant integer}} + return res; +} + +v16i16 xvsrai_h(v16i16 _1, int var) { + v16i16 res = __builtin_lasx_xvsrai_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvsrai_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvsrai_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrai_h' must be a constant integer}} + return res; +} + +v8i32 xvsrai_w(v8i32 _1, int var) { + v8i32 res = __builtin_lasx_xvsrai_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsrai_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsrai_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrai_w' must be a constant integer}} + return res; +} + +v4i64 xvsrai_d(v4i64 _1, int var) { + v4i64 res = __builtin_lasx_xvsrai_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvsrai_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvsrai_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrai_d' must be a constant integer}} + return res; +} + +v32i8 xvsrari_b(v32i8 _1, int var) { + v32i8 res = __builtin_lasx_xvsrari_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvsrari_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvsrari_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrari_b' must be a constant integer}} + return res; +} + +v16i16 xvsrari_h(v16i16 _1, int var) { + v16i16 res = __builtin_lasx_xvsrari_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvsrari_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvsrari_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrari_h' must be a constant integer}} + return res; +} + +v8i32 xvsrari_w(v8i32 _1, int var) { + v8i32 res = __builtin_lasx_xvsrari_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsrari_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsrari_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrari_w' must be a constant integer}} + return res; +} + +v4i64 xvsrari_d(v4i64 _1, int var) { + v4i64 res = __builtin_lasx_xvsrari_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvsrari_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvsrari_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrari_d' must be a constant integer}} + return res; +} + +v32i8 xvsrli_b(v32i8 _1, int var) { + v32i8 res = __builtin_lasx_xvsrli_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvsrli_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvsrli_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrli_b' must be a constant integer}} + return res; +} + +v16i16 xvsrli_h(v16i16 _1, int var) { + v16i16 res = __builtin_lasx_xvsrli_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvsrli_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvsrli_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrli_h' must be a constant integer}} + return res; +} + +v8i32 xvsrli_w(v8i32 _1, int var) { + v8i32 res = __builtin_lasx_xvsrli_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsrli_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsrli_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrli_w' must be a constant integer}} + return res; +} + +v4i64 xvsrli_d(v4i64 _1, int var) { + v4i64 res = __builtin_lasx_xvsrli_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvsrli_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvsrli_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrli_d' must be a constant integer}} + return res; +} + +v32i8 xvsrlri_b(v32i8 _1, int var) { + v32i8 res = __builtin_lasx_xvsrlri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvsrlri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvsrlri_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrlri_b' must be a constant integer}} + return res; +} + +v16i16 xvsrlri_h(v16i16 _1, int var) { + v16i16 res = __builtin_lasx_xvsrlri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvsrlri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvsrlri_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrlri_h' must be a constant integer}} + return res; +} + +v8i32 xvsrlri_w(v8i32 _1, int var) { + v8i32 res = __builtin_lasx_xvsrlri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsrlri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsrlri_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrlri_w' must be a constant integer}} + return res; +} + +v4i64 xvsrlri_d(v4i64 _1, int var) { + v4i64 res = __builtin_lasx_xvsrlri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvsrlri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvsrlri_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrlri_d' must be a constant integer}} + return res; +} + +v32u8 xvbitclri_b(v32u8 _1, int var) { + v32u8 res = __builtin_lasx_xvbitclri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvbitclri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvbitclri_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitclri_b' must be a constant integer}} + return res; +} + +v16u16 xvbitclri_h(v16u16 _1, int var) { + v16u16 res = __builtin_lasx_xvbitclri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvbitclri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvbitclri_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitclri_h' must be a constant integer}} + return res; +} + +v8u32 xvbitclri_w(v8u32 _1, int var) { + v8u32 res = __builtin_lasx_xvbitclri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvbitclri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvbitclri_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitclri_w' must be a constant integer}} + return res; +} + +v4u64 xvbitclri_d(v4u64 _1, int var) { + v4u64 res = __builtin_lasx_xvbitclri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvbitclri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvbitclri_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitclri_d' must be a constant integer}} + return res; +} + +v32u8 xvbitseti_b(v32u8 _1, int var) { + v32u8 res = __builtin_lasx_xvbitseti_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvbitseti_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvbitseti_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitseti_b' must be a constant integer}} + return res; +} + +v16u16 xvbitseti_h(v16u16 _1, int var) { + v16u16 res = __builtin_lasx_xvbitseti_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvbitseti_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvbitseti_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitseti_h' must be a constant integer}} + return res; +} + +v8u32 xvbitseti_w(v8u32 _1, int var) { + v8u32 res = __builtin_lasx_xvbitseti_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvbitseti_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvbitseti_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitseti_w' must be a constant integer}} + return res; +} + +v4u64 xvbitseti_d(v4u64 _1, int var) { + v4u64 res = __builtin_lasx_xvbitseti_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvbitseti_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvbitseti_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitseti_d' must be a constant integer}} + return res; +} + +v32u8 xvbitrevi_b(v32u8 _1, int var) { + v32u8 res = __builtin_lasx_xvbitrevi_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvbitrevi_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvbitrevi_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitrevi_b' must be a constant integer}} + return res; +} + +v16u16 xvbitrevi_h(v16u16 _1, int var) { + v16u16 res = __builtin_lasx_xvbitrevi_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvbitrevi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvbitrevi_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitrevi_h' must be a constant integer}} + return res; +} + +v8u32 xvbitrevi_w(v8u32 _1, int var) { + v8u32 res = __builtin_lasx_xvbitrevi_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvbitrevi_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvbitrevi_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitrevi_w' must be a constant integer}} + return res; +} + +v4u64 xvbitrevi_d(v4u64 _1, int var) { + v4u64 res = __builtin_lasx_xvbitrevi_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvbitrevi_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvbitrevi_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitrevi_d' must be a constant integer}} + return res; +} + +v32i8 xvaddi_bu(v32i8 _1, int var) { + v32i8 res = __builtin_lasx_xvaddi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvaddi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvaddi_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvaddi_bu' must be a constant integer}} + return res; +} + +v16i16 xvaddi_hu(v16i16 _1, int var) { + v16i16 res = __builtin_lasx_xvaddi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvaddi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvaddi_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvaddi_hu' must be a constant integer}} + return res; +} + +v8i32 xvaddi_wu(v8i32 _1, int var) { + v8i32 res = __builtin_lasx_xvaddi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvaddi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvaddi_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvaddi_wu' must be a constant integer}} + return res; +} + +v4i64 xvaddi_du(v4i64 _1, int var) { + v4i64 res = __builtin_lasx_xvaddi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvaddi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvaddi_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvaddi_du' must be a constant integer}} + return res; +} + +v32i8 xvsubi_bu(v32i8 _1, int var) { + v32i8 res = __builtin_lasx_xvsubi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsubi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsubi_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsubi_bu' must be a constant integer}} + return res; +} + +v16i16 xvsubi_hu(v16i16 _1, int var) { + v16i16 res = __builtin_lasx_xvsubi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsubi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsubi_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsubi_hu' must be a constant integer}} + return res; +} + +v8i32 xvsubi_wu(v8i32 _1, int var) { + v8i32 res = __builtin_lasx_xvsubi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsubi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsubi_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsubi_wu' must be a constant integer}} + return res; +} + +v4i64 xvsubi_du(v4i64 _1, int var) { + v4i64 res = __builtin_lasx_xvsubi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsubi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsubi_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvsubi_du' must be a constant integer}} + return res; +} + +v32i8 xvmaxi_b(v32i8 _1, int var) { + v32i8 res = __builtin_lasx_xvmaxi_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvmaxi_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvmaxi_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_b' must be a constant integer}} + return res; +} + +v16i16 xvmaxi_h(v16i16 _1, int var) { + v16i16 res = __builtin_lasx_xvmaxi_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvmaxi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvmaxi_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_h' must be a constant integer}} + return res; +} + +v8i32 xvmaxi_w(v8i32 _1, int var) { + v8i32 res = __builtin_lasx_xvmaxi_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvmaxi_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvmaxi_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_w' must be a constant integer}} + return res; +} + +v4i64 xvmaxi_d(v4i64 _1, int var) { + v4i64 res = __builtin_lasx_xvmaxi_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvmaxi_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvmaxi_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_d' must be a constant integer}} + return res; +} + +v32u8 xvmaxi_bu(v32u8 _1, int var) { + v32u8 res = __builtin_lasx_xvmaxi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvmaxi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvmaxi_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_bu' must be a constant integer}} + return res; +} + +v16u16 xvmaxi_hu(v16u16 _1, int var) { + v16u16 res = __builtin_lasx_xvmaxi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvmaxi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvmaxi_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_hu' must be a constant integer}} + return res; +} + +v8u32 xvmaxi_wu(v8u32 _1, int var) { + v8u32 res = __builtin_lasx_xvmaxi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvmaxi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvmaxi_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_wu' must be a constant integer}} + return res; +} + +v4u64 xvmaxi_du(v4u64 _1, int var) { + v4u64 res = __builtin_lasx_xvmaxi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvmaxi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvmaxi_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_du' must be a constant integer}} + return res; +} + +v32i8 xvmini_b(v32i8 _1, int var) { + v32i8 res = __builtin_lasx_xvmini_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvmini_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvmini_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_b' must be a constant integer}} + return res; +} + +v16i16 xvmini_h(v16i16 _1, int var) { + v16i16 res = __builtin_lasx_xvmini_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvmini_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvmini_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_h' must be a constant integer}}} + return res; +} + +v8i32 xvmini_w(v8i32 _1, int var) { + v8i32 res = __builtin_lasx_xvmini_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvmini_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvmini_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_w' must be a constant integer}} + return res; +} + +v4i64 xvmini_d(v4i64 _1, int var) { + v4i64 res = __builtin_lasx_xvmini_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvmini_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvmini_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_d' must be a constant integer}} + return res; +} + +v32u8 xvmini_bu(v32u8 _1, int var) { + v32u8 res = __builtin_lasx_xvmini_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvmini_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvmini_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_bu' must be a constant integer}} + return res; +} + +v16u16 xvmini_hu(v16u16 _1, int var) { + v16u16 res = __builtin_lasx_xvmini_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvmini_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvmini_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_hu' must be a constant integer}} + return res; +} + +v8u32 xvmini_wu(v8u32 _1, int var) { + v8u32 res = __builtin_lasx_xvmini_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvmini_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvmini_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_wu' must be a constant integer}} + return res; +} + +v4u64 xvmini_du(v4u64 _1, int var) { + v4u64 res = __builtin_lasx_xvmini_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvmini_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvmini_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_du' must be a constant integer}} + return res; +} + +v32i8 xvseqi_b(v32i8 _1, int var) { + v32i8 res = __builtin_lasx_xvseqi_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvseqi_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvseqi_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvseqi_b' must be a constant integer}} + return res; +} + +v16i16 xvseqi_h(v16i16 _1, int var) { + v16i16 res = __builtin_lasx_xvseqi_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvseqi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvseqi_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvseqi_h' must be a constant integer}} + return res; +} + +v8i32 xvseqi_w(v8i32 _1, int var) { + v8i32 res = __builtin_lasx_xvseqi_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvseqi_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvseqi_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvseqi_w' must be a constant integer}} + return res; +} + +v4i64 xvseqi_d(v4i64 _1, int var) { + v4i64 res = __builtin_lasx_xvseqi_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvseqi_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvseqi_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvseqi_d' must be a constant integer}} + return res; +} + +v32i8 xvslti_b(v32i8 _1, int var) { + v32i8 res = __builtin_lasx_xvslti_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvslti_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvslti_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_b' must be a constant integer}} + return res; +} + +v16i16 xvslti_h(v16i16 _1, int var) { + v16i16 res = __builtin_lasx_xvslti_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvslti_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvslti_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_h' must be a constant integer}} + return res; +} + +v8i32 xvslti_w(v8i32 _1, int var) { + v8i32 res = __builtin_lasx_xvslti_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvslti_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvslti_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_w' must be a constant integer}} + return res; +} + +v4i64 xvslti_d(v4i64 _1, int var) { + v4i64 res = __builtin_lasx_xvslti_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvslti_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvslti_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_d' must be a constant integer}} + return res; +} + +v32i8 xvslti_bu(v32u8 _1, int var) { + v32i8 res = __builtin_lasx_xvslti_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvslti_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvslti_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_bu' must be a constant integer}} + return res; +} + +v16i16 xvslti_hu(v16u16 _1, int var) { + v16i16 res = __builtin_lasx_xvslti_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvslti_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvslti_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_hu' must be a constant integer}} + return res; +} + +v8i32 xvslti_wu(v8u32 _1, int var) { + v8i32 res = __builtin_lasx_xvslti_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvslti_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvslti_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_wu' must be a constant integer}} + return res; +} + +v4i64 xvslti_du(v4u64 _1, int var) { + v4i64 res = __builtin_lasx_xvslti_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvslti_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvslti_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_du' must be a constant integer}} + return res; +} + +v32i8 xvslei_b(v32i8 _1, int var) { + v32i8 res = __builtin_lasx_xvslei_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvslei_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvslei_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_b' must be a constant integer}} + return res; +} + +v16i16 xvslei_h(v16i16 _1, int var) { + v16i16 res = __builtin_lasx_xvslei_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvslei_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvslei_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_h' must be a constant integer}} + return res; +} + +v8i32 xvslei_w(v8i32 _1, int var) { + v8i32 res = __builtin_lasx_xvslei_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvslei_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvslei_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_w' must be a constant integer}} + return res; +} + +v4i64 xvslei_d(v4i64 _1, int var) { + v4i64 res = __builtin_lasx_xvslei_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvslei_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lasx_xvslei_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_d' must be a constant integer}} + return res; +} + +v32i8 xvslei_bu(v32u8 _1, int var) { + v32i8 res = __builtin_lasx_xvslei_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvslei_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvslei_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_bu' must be a constant integer}} + return res; +} + +v16i16 xvslei_hu(v16u16 _1, int var) { + v16i16 res = __builtin_lasx_xvslei_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvslei_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvslei_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_hu' must be a constant integer}} + return res; +} + +v8i32 xvslei_wu(v8u32 _1, int var) { + v8i32 res = __builtin_lasx_xvslei_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvslei_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvslei_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_wu' must be a constant integer}} + return res; +} + +v4i64 xvslei_du(v4u64 _1, int var) { + v4i64 res = __builtin_lasx_xvslei_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvslei_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvslei_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_du' must be a constant integer}} + return res; +} + +v32i8 xvsat_b(v32i8 _1, int var) { + v32i8 res = __builtin_lasx_xvsat_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvsat_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvsat_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_b' must be a constant integer}} + return res; +} + +v16i16 xvsat_h(v16i16 _1, int var) { + v16i16 res = __builtin_lasx_xvsat_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvsat_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvsat_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_h' must be a constant integer}} + return res; +} + +v8i32 xvsat_w(v8i32 _1, int var) { + v8i32 res = __builtin_lasx_xvsat_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsat_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsat_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_w' must be a constant integer}} + return res; +} + +v4i64 xvsat_d(v4i64 _1, int var) { + v4i64 res = __builtin_lasx_xvsat_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvsat_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvsat_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_d' must be a constant integer}} + return res; +} + +v32u8 xvsat_bu(v32u8 _1, int var) { + v32u8 res = __builtin_lasx_xvsat_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvsat_bu(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvsat_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_bu' must be a constant integer}} + return res; +} + +v16u16 xvsat_hu(v16u16 _1, int var) { + v16u16 res = __builtin_lasx_xvsat_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvsat_hu(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvsat_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_hu' must be a constant integer}} + return res; +} + +v8u32 xvsat_wu(v8u32 _1, int var) { + v8u32 res = __builtin_lasx_xvsat_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsat_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsat_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_wu' must be a constant integer}} + return res; +} + +v4u64 xvsat_du(v4u64 _1, int var) { + v4u64 res = __builtin_lasx_xvsat_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvsat_du(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvsat_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_du' must be a constant integer}} + return res; +} + +v32i8 xvrepl128vei_b(v32i8 _1, int var) { + v32i8 res = __builtin_lasx_xvrepl128vei_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvrepl128vei_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvrepl128vei_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvrepl128vei_b' must be a constant integer}} + return res; +} + +v16i16 xvrepl128vei_h(v16i16 _1, int var) { + v16i16 res = __builtin_lasx_xvrepl128vei_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvrepl128vei_h(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvrepl128vei_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvrepl128vei_h' must be a constant integer}} + return res; +} + +v8i32 xvrepl128vei_w(v8i32 _1, int var) { + v8i32 res = __builtin_lasx_xvrepl128vei_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} + res |= __builtin_lasx_xvrepl128vei_w(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + res |= __builtin_lasx_xvrepl128vei_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvrepl128vei_w' must be a constant integer}} + return res; +} + +v4i64 xvrepl128vei_d(v4i64 _1, int var) { + v4i64 res = __builtin_lasx_xvrepl128vei_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 1]}} + res |= __builtin_lasx_xvrepl128vei_d(_1, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} + res |= __builtin_lasx_xvrepl128vei_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvrepl128vei_d' must be a constant integer}} + return res; +} + +v32u8 xvandi_b(v32u8 _1, int var) { + v32u8 res = __builtin_lasx_xvandi_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvandi_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvandi_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvandi_b' must be a constant integer}} + return res; +} + +v32u8 xvori_b(v32u8 _1, int var) { + v32u8 res = __builtin_lasx_xvori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvori_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvori_b' must be a constant integer}} + return res; +} + +v32u8 xvnori_b(v32u8 _1, int var) { + v32u8 res = __builtin_lasx_xvnori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvnori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvnori_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvnori_b' must be a constant integer}} + return res; +} + +v32u8 xvxori_b(v32u8 _1, int var) { + v32u8 res = __builtin_lasx_xvxori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvxori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvxori_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvxori_b' must be a constant integer}} + return res; +} + +v32u8 xvbitseli_b(v32u8 _1, v32u8 _2, int var) { + v32u8 res = __builtin_lasx_xvbitseli_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvbitseli_b(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvbitseli_b(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvbitseli_b' must be a constant integer}} + return res; +} + +v32i8 xvshuf4i_b(v32i8 _1, int var) { + v32i8 res = __builtin_lasx_xvshuf4i_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvshuf4i_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvshuf4i_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvshuf4i_b' must be a constant integer}} + return res; +} + +v16i16 xvshuf4i_h(v16i16 _1, int var) { + v16i16 res = __builtin_lasx_xvshuf4i_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvshuf4i_h(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvshuf4i_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvshuf4i_h' must be a constant integer}} + return res; +} + +v8i32 xvshuf4i_w(v8i32 _1, int var) { + v8i32 res = __builtin_lasx_xvshuf4i_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvshuf4i_w(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvshuf4i_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvshuf4i_w' must be a constant integer}} + return res; +} + +v4i64 xvshuf4i_d(v4i64 _1, v4i64 _2, int var) { + v4i64 res = __builtin_lasx_xvshuf4i_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvshuf4i_d(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvshuf4i_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvshuf4i_d' must be a constant integer}} + return res; +} + +v8i32 xvpermi_w(v8i32 _1, v8i32 _2, int var) { + v8i32 res = __builtin_lasx_xvpermi_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvpermi_w(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvpermi_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvpermi_w' must be a constant integer}} + return res; +} + +v4i64 xvpermi_d(v4i64 _1, int var) { + v4i64 res = __builtin_lasx_xvpermi_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvpermi_d(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvpermi_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvpermi_d' must be a constant integer}} + return res; +} + +v32i8 xvpermi_q(v32i8 _1, v32i8 _2, int var) { + v32i8 res = __builtin_lasx_xvpermi_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvpermi_q(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvpermi_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvpermi_q' must be a constant integer}} + return res; +} + +v16i16 xvsllwil_h_b(v32i8 _1, int var) { + v16i16 res = __builtin_lasx_xvsllwil_h_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvsllwil_h_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvsllwil_h_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_h_b' must be a constant integer}} + return res; +} + +v8i32 xvsllwil_w_h(v16i16 _1, int var) { + v8i32 res = __builtin_lasx_xvsllwil_w_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvsllwil_w_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvsllwil_w_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_w_h' must be a constant integer}} + return res; +} + +v4i64 xvsllwil_d_w(v8i32 _1, int var) { + v4i64 res = __builtin_lasx_xvsllwil_d_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsllwil_d_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsllwil_d_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_d_w' must be a constant integer}} + return res; +} + +v16u16 xvsllwil_hu_bu(v32u8 _1, int var) { + v16u16 res = __builtin_lasx_xvsllwil_hu_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvsllwil_hu_bu(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvsllwil_hu_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_hu_bu' must be a constant integer}} + return res; +} + +v8u32 xvsllwil_wu_hu(v16u16 _1, int var) { + v8u32 res = __builtin_lasx_xvsllwil_wu_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvsllwil_wu_hu(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvsllwil_wu_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_wu_hu' must be a constant integer}} + return res; +} + +v4u64 xvsllwil_du_wu(v8u32 _1, int var) { + v4u64 res = __builtin_lasx_xvsllwil_du_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsllwil_du_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsllwil_du_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_du_wu' must be a constant integer}} + return res; +} + +v32i8 xvfrstpi_b(v32i8 _1, v32i8 _2, int var) { + v32i8 res = __builtin_lasx_xvfrstpi_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvfrstpi_b(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvfrstpi_b(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvfrstpi_b' must be a constant integer}} + return res; +} + +v16i16 xvfrstpi_h(v16i16 _1, v16i16 _2, int var) { + v16i16 res = __builtin_lasx_xvfrstpi_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvfrstpi_h(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvfrstpi_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvfrstpi_h' must be a constant integer}} + return res; +} + +v32i8 xvbsrl_v(v32i8 _1, int var) { + v32i8 res = __builtin_lasx_xvbsrl_v(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvbsrl_v(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvbsrl_v(_1, var); // expected-error {{argument to '__builtin_lasx_xvbsrl_v' must be a constant integer}} + return res; +} + +v32i8 xvbsll_v(v32i8 _1, int var) { + v32i8 res = __builtin_lasx_xvbsll_v(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvbsll_v(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvbsll_v(_1, var); // expected-error {{argument to '__builtin_lasx_xvbsll_v' must be a constant integer}} + return res; +} + +v32i8 xvextrins_b(v32i8 _1, v32i8 _2, int var) { + v32i8 res = __builtin_lasx_xvextrins_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvextrins_b(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvextrins_b(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvextrins_b' must be a constant integer}} + return res; +} + +v16i16 xvextrins_h(v16i16 _1, v16i16 _2, int var) { + v16i16 res = __builtin_lasx_xvextrins_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvextrins_h(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvextrins_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvextrins_h' must be a constant integer}} + return res; +} + +v8i32 xvextrins_w(v8i32 _1, v8i32 _2, int var) { + v8i32 res = __builtin_lasx_xvextrins_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvextrins_w(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvextrins_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvextrins_w' must be a constant integer}} + return res; +} + +v4i64 xvextrins_d(v4i64 _1, v4i64 _2, int var) { + v4i64 res = __builtin_lasx_xvextrins_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvextrins_d(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __builtin_lasx_xvextrins_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvextrins_d' must be a constant integer}} + return res; +} + +v32i8 xvld(void *_1, int var) { + v32i8 res = __builtin_lasx_xvld(_1, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} + res |= __builtin_lasx_xvld(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2047]}} + res |= __builtin_lasx_xvld(_1, var); // expected-error {{argument to '__builtin_lasx_xvld' must be a constant integer}} + return res; +} + +void xvst(v32i8 _1, void *_2, int var) { + __builtin_lasx_xvst(_1, _2, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} + __builtin_lasx_xvst(_1, _2, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2047]}} + __builtin_lasx_xvst(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvst' must be a constant integer}} +} + +void xvstelm_b(v32i8 _1, void * _2, int var) { + __builtin_lasx_xvstelm_b(_1, _2, -129, 1); // expected-error {{argument value -129 is outside the valid range [-128, 127]}} + __builtin_lasx_xvstelm_b(_1, _2, 128, 1); // expected-error {{argument value 128 is outside the valid range [-128, 127]}} + __builtin_lasx_xvstelm_b(_1, _2, var, 1); // expected-error {{argument to '__builtin_lasx_xvstelm_b' must be a constant integer}} +} + +void xvstelm_h(v16i16 _1, void * _2, int var) { + __builtin_lasx_xvstelm_h(_1, _2, -258, 1); // expected-error {{argument value -258 is outside the valid range [-256, 254]}} + __builtin_lasx_xvstelm_h(_1, _2, 256, 1); // expected-error {{argument value 256 is outside the valid range [-256, 254]}} + __builtin_lasx_xvstelm_h(_1, _2, var, 1); // expected-error {{argument to '__builtin_lasx_xvstelm_h' must be a constant integer}} +} + +void xvstelm_w(v8i32 _1, void * _2, int var) { + __builtin_lasx_xvstelm_w(_1, _2, -516, 1); // expected-error {{argument value -516 is outside the valid range [-512, 508]}} + __builtin_lasx_xvstelm_w(_1, _2, 512, 1); // expected-error {{argument value 512 is outside the valid range [-512, 508]}} + __builtin_lasx_xvstelm_w(_1, _2, var, 1); // expected-error {{argument to '__builtin_lasx_xvstelm_w' must be a constant integer}} +} + +void xvstelm_d(v4i64 _1, void * _2, int var) { + __builtin_lasx_xvstelm_d(_1, _2, -1032, 1); // expected-error {{argument value -1032 is outside the valid range [-1024, 1016]}} + __builtin_lasx_xvstelm_d(_1, _2, 1024, 1); // expected-error {{argument value 1024 is outside the valid range [-1024, 1016]}} + __builtin_lasx_xvstelm_d(_1, _2, var, 1); // expected-error {{argument to '__builtin_lasx_xvstelm_d' must be a constant integer}} +} + +void xvstelm_b_idx(v32i8 _1, void * _2, int var) { + __builtin_lasx_xvstelm_b(_1, _2, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + __builtin_lasx_xvstelm_b(_1, _2, 1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + __builtin_lasx_xvstelm_b(_1, _2, 1, var); // expected-error {{argument to '__builtin_lasx_xvstelm_b' must be a constant integer}} +} + +void xvstelm_h_idx(v16i16 _1, void * _2, int var) { + __builtin_lasx_xvstelm_h(_1, _2, 2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + __builtin_lasx_xvstelm_h(_1, _2, 2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + __builtin_lasx_xvstelm_h(_1, _2, 2, var); // expected-error {{argument to '__builtin_lasx_xvstelm_h' must be a constant integer}} +} + +void xvstelm_w_idx(v8i32 _1, void * _2, int var) { + __builtin_lasx_xvstelm_w(_1, _2, 4, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + __builtin_lasx_xvstelm_w(_1, _2, 4, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + __builtin_lasx_xvstelm_w(_1, _2, 4, var); // expected-error {{argument to '__builtin_lasx_xvstelm_w' must be a constant integer}} +} + +void xvstelm_d_idx(v4i64 _1, void * _2, int var) { + __builtin_lasx_xvstelm_d(_1, _2, 8, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} + __builtin_lasx_xvstelm_d(_1, _2, 8, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + __builtin_lasx_xvstelm_d(_1, _2, 8, var); // expected-error {{argument to '__builtin_lasx_xvstelm_d' must be a constant integer}} +} + +v8i32 xvinsve0_w(v8i32 _1, v8i32 _2, int var) { + v8i32 res = __builtin_lasx_xvinsve0_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvinsve0_w(_1, _2, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvinsve0_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvinsve0_w' must be a constant integer}} + return res; +} + +v4i64 xvinsve0_d(v4i64 _1, v4i64 _2, int var) { + v4i64 res = __builtin_lasx_xvinsve0_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} + res |= __builtin_lasx_xvinsve0_d(_1, _2, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + res |= __builtin_lasx_xvinsve0_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvinsve0_d' must be a constant integer}} + return res; +} + +v8i32 xvpickve_w(v8i32 _1, int var) { + v8i32 res = __builtin_lasx_xvpickve_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvpickve_w(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvpickve_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve_w' must be a constant integer}} + return res; +} + +v4i64 xvpickve_d(v4i64 _1, int var) { + v4i64 res = __builtin_lasx_xvpickve_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} + res |= __builtin_lasx_xvpickve_d(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + res |= __builtin_lasx_xvpickve_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve_d' must be a constant integer}} + return res; +} + +v4i64 xvldi(int var) { + v4i64 res = __builtin_lasx_xvldi(-4097); // expected-error {{argument value -4097 is outside the valid range [-4096, 4095]}} + res |= __builtin_lasx_xvldi(4096); // expected-error {{argument value 4096 is outside the valid range [-4096, 4095]}} + res |= __builtin_lasx_xvldi(var); // expected-error {{argument to '__builtin_lasx_xvldi' must be a constant integer}} + return res; +} + +v8i32 xvinsgr2vr_w(v8i32 _1, int var) { + v8i32 res = __builtin_lasx_xvinsgr2vr_w(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvinsgr2vr_w(_1, 1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvinsgr2vr_w(_1, 1, var); // expected-error {{argument to '__builtin_lasx_xvinsgr2vr_w' must be a constant integer}} + return res; +} + +v4i64 xvinsgr2vr_d(v4i64 _1, int var) { + v4i64 res = __builtin_lasx_xvinsgr2vr_d(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} + res |= __builtin_lasx_xvinsgr2vr_d(_1, 1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + res |= __builtin_lasx_xvinsgr2vr_d(_1, 1, var); // expected-error {{argument to '__builtin_lasx_xvinsgr2vr_d' must be a constant integer}} + return res; +} + +v32i8 xvldrepl_b(void *_1, int var) { + v32i8 res = __builtin_lasx_xvldrepl_b(_1, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} + res |= __builtin_lasx_xvldrepl_b(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2047]}} + res |= __builtin_lasx_xvldrepl_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvldrepl_b' must be a constant integer}} + return res; +} + +v16i16 xvldrepl_h(void *_1, int var) { + v16i16 res = __builtin_lasx_xvldrepl_h(_1, -2050); // expected-error {{argument value -2050 is outside the valid range [-2048, 2046]}} + res |= __builtin_lasx_xvldrepl_h(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2046]}} + res |= __builtin_lasx_xvldrepl_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvldrepl_h' must be a constant integer}} + return res; +} + +v8i32 xvldrepl_w(void *_1, int var) { + v8i32 res = __builtin_lasx_xvldrepl_w(_1, -2052); // expected-error {{argument value -2052 is outside the valid range [-2048, 2044]}} + res |= __builtin_lasx_xvldrepl_w(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2044]}} + res |= __builtin_lasx_xvldrepl_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvldrepl_w' must be a constant integer}} + return res; +} + +v4i64 xvldrepl_d(void *_1, int var) { + v4i64 res = __builtin_lasx_xvldrepl_d(_1, -2056); // expected-error {{argument value -2056 is outside the valid range [-2048, 2040]}} + res |= __builtin_lasx_xvldrepl_d(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2040]}} + res |= __builtin_lasx_xvldrepl_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvldrepl_d' must be a constant integer}} + return res; +} + +int xvpickve2gr_w(v8i32 _1, int var) { + int res = __builtin_lasx_xvpickve2gr_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvpickve2gr_w(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvpickve2gr_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve2gr_w' must be a constant integer}} + return res; +} + +unsigned int xvpickve2gr_wu(v8i32 _1, int var) { + unsigned int res = __builtin_lasx_xvpickve2gr_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvpickve2gr_wu(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvpickve2gr_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve2gr_wu' must be a constant integer}} + return res; +} + +long xvpickve2gr_d(v4i64 _1, int var) { + long res = __builtin_lasx_xvpickve2gr_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} + res |= __builtin_lasx_xvpickve2gr_d(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + res |= __builtin_lasx_xvpickve2gr_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve2gr_d' must be a constant integer}} + return res; +} + +unsigned long int xvpickve2gr_du(v4i64 _1, int var) { + unsigned long int res = __builtin_lasx_xvpickve2gr_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} + res |= __builtin_lasx_xvpickve2gr_du(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + res |= __builtin_lasx_xvpickve2gr_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve2gr_du' must be a constant integer}} + return res; +} + +v32i8 xvrotri_b(v32i8 _1, int var) { + v32i8 res = __builtin_lasx_xvrotri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvrotri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lasx_xvrotri_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvrotri_b' must be a constant integer}} + return res; +} + +v16i16 xvrotri_h(v16i16 _1, int var) { + v16i16 res = __builtin_lasx_xvrotri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvrotri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvrotri_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvrotri_h' must be a constant integer}} + return res; +} + +v8i32 xvrotri_w(v8i32 _1, int var) { + v8i32 res = __builtin_lasx_xvrotri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvrotri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvrotri_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvrotri_w' must be a constant integer}} + return res; +} + +v4i64 xvrotri_d(v4i64 _1, int var) { + v4i64 res = __builtin_lasx_xvrotri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvrotri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvrotri_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvrotri_d' must be a constant integer}} + return res; +} + +v32i8 xvsrlni_b_h(v32i8 _1, v32i8 _2, int var) { + v32i8 res = __builtin_lasx_xvsrlni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvsrlni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvsrlni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlni_b_h' must be a constant integer}} + return res; +} + +v16i16 xvsrlni_h_w(v16i16 _1, v16i16 _2, int var) { + v16i16 res = __builtin_lasx_xvsrlni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsrlni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsrlni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlni_h_w' must be a constant integer}} + return res; +} + +v8i32 xvsrlni_w_d(v8i32 _1, v8i32 _2, int var) { + v8i32 res = __builtin_lasx_xvsrlni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvsrlni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvsrlni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlni_w_d' must be a constant integer}} + return res; +} + +v4i64 xvsrlni_d_q(v4i64 _1, v4i64 _2, int var) { + v4i64 res = __builtin_lasx_xvsrlni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __builtin_lasx_xvsrlni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __builtin_lasx_xvsrlni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlni_d_q' must be a constant integer}} + return res; +} + +v32i8 xvsrlrni_b_h(v32i8 _1, v32i8 _2, int var) { + v32i8 res = __builtin_lasx_xvsrlrni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvsrlrni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvsrlrni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlrni_b_h' must be a constant integer}} + return res; +} + +v16i16 xvsrlrni_h_w(v16i16 _1, v16i16 _2, int var) { + v16i16 res = __builtin_lasx_xvsrlrni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsrlrni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsrlrni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlrni_h_w' must be a constant integer}} + return res; +} + +v8i32 xvsrlrni_w_d(v8i32 _1, v8i32 _2, int var) { + v8i32 res = __builtin_lasx_xvsrlrni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvsrlrni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvsrlrni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlrni_w_d' must be a constant integer}} + return res; +} + +v4i64 xvsrlrni_d_q(v4i64 _1, v4i64 _2, int var) { + v4i64 res = __builtin_lasx_xvsrlrni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __builtin_lasx_xvsrlrni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __builtin_lasx_xvsrlrni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlrni_d_q' must be a constant integer}} + return res; +} + +v32i8 xvssrlni_b_h(v32i8 _1, v32i8 _2, int var) { + v32i8 res = __builtin_lasx_xvssrlni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvssrlni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvssrlni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_b_h' must be a constant integer}} + return res; +} + +v16i16 xvssrlni_h_w(v16i16 _1, v16i16 _2, int var) { + v16i16 res = __builtin_lasx_xvssrlni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvssrlni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvssrlni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_h_w' must be a constant integer}} + return res; +} + +v8i32 xvssrlni_w_d(v8i32 _1, v8i32 _2, int var) { + v8i32 res = __builtin_lasx_xvssrlni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvssrlni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvssrlni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_w_d' must be a constant integer}} + return res; +} + +v4i64 xvssrlni_d_q(v4i64 _1, v4i64 _2, int var) { + v4i64 res = __builtin_lasx_xvssrlni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __builtin_lasx_xvssrlni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __builtin_lasx_xvssrlni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_d_q' must be a constant integer}} + return res; +} + +v32u8 xvssrlni_bu_h(v32u8 _1, v32i8 _2, int var) { + v32u8 res = __builtin_lasx_xvssrlni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvssrlni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvssrlni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_bu_h' must be a constant integer}} + return res; +} + +v16u16 xvssrlni_hu_w(v16u16 _1, v16i16 _2, int var) { + v16u16 res = __builtin_lasx_xvssrlni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvssrlni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvssrlni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_hu_w' must be a constant integer}} + return res; +} + +v8u32 xvssrlni_wu_d(v8u32 _1, v8i32 _2, int var) { + v8u32 res = __builtin_lasx_xvssrlni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvssrlni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvssrlni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_wu_d' must be a constant integer}} + return res; +} + +v4u64 xvssrlni_du_q(v4u64 _1, v4i64 _2, int var) { + v4u64 res = __builtin_lasx_xvssrlni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __builtin_lasx_xvssrlni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __builtin_lasx_xvssrlni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_du_q' must be a constant integer}} + return res; +} + +v32i8 xvssrlrni_b_h(v32i8 _1, v32i8 _2, int var) { + v32i8 res = __builtin_lasx_xvssrlrni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvssrlrni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvssrlrni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_b_h' must be a constant integer}} + return res; +} + +v16i16 xvssrlrni_h_w(v16i16 _1, v16i16 _2, int var) { + v16i16 res = __builtin_lasx_xvssrlrni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvssrlrni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvssrlrni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_h_w' must be a constant integer}} + return res; +} + +v8i32 xvssrlrni_w_d(v8i32 _1, v8i32 _2, int var) { + v8i32 res = __builtin_lasx_xvssrlrni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvssrlrni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvssrlrni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_w_d' must be a constant integer}} + return res; +} + +v4i64 xvssrlrni_d_q(v4i64 _1, v4i64 _2, int var) { + v4i64 res = __builtin_lasx_xvssrlrni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __builtin_lasx_xvssrlrni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __builtin_lasx_xvssrlrni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_d_q' must be a constant integer}} + return res; +} + +v32u8 xvssrlrni_bu_h(v32u8 _1, v32i8 _2, int var) { + v32u8 res = __builtin_lasx_xvssrlrni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvssrlrni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvssrlrni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_bu_h' must be a constant integer}} + return res; +} + +v16u16 xvssrlrni_hu_w(v16u16 _1, v16i16 _2, int var) { + v16u16 res = __builtin_lasx_xvssrlrni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvssrlrni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvssrlrni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_hu_w' must be a constant integer}} + return res; +} + +v8u32 xvssrlrni_wu_d(v8u32 _1, v8i32 _2, int var) { + v8u32 res = __builtin_lasx_xvssrlrni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvssrlrni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvssrlrni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_wu_d' must be a constant integer}} + return res; +} + +v4u64 xvssrlrni_du_q(v4u64 _1, v4i64 _2, int var) { + v4u64 res = __builtin_lasx_xvssrlrni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __builtin_lasx_xvssrlrni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __builtin_lasx_xvssrlrni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_du_q' must be a constant integer}} + return res; +} + +v32i8 xvsrani_b_h(v32i8 _1, v32i8 _2, int var) { + v32i8 res = __builtin_lasx_xvsrani_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvsrani_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvsrani_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrani_b_h' must be a constant integer}} + return res; +} + +v16i16 xvsrani_h_w(v16i16 _1, v16i16 _2, int var) { + v16i16 res = __builtin_lasx_xvsrani_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsrani_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsrani_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrani_h_w' must be a constant integer}} + return res; +} + +v8i32 xvsrani_w_d(v8i32 _1, v8i32 _2, int var) { + v8i32 res = __builtin_lasx_xvsrani_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvsrani_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvsrani_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrani_w_d' must be a constant integer}} + return res; +} + +v4i64 xvsrani_d_q(v4i64 _1, v4i64 _2, int var) { + v4i64 res = __builtin_lasx_xvsrani_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __builtin_lasx_xvsrani_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __builtin_lasx_xvsrani_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrani_d_q' must be a constant integer}} + return res; +} + +v32i8 xvsrarni_b_h(v32i8 _1, v32i8 _2, int var) { + v32i8 res = __builtin_lasx_xvsrarni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvsrarni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvsrarni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrarni_b_h' must be a constant integer}} + return res; +} + +v16i16 xvsrarni_h_w(v16i16 _1, v16i16 _2, int var) { + v16i16 res = __builtin_lasx_xvsrarni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsrarni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvsrarni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrarni_h_w' must be a constant integer}} + return res; +} + +v8i32 xvsrarni_w_d(v8i32 _1, v8i32 _2, int var) { + v8i32 res = __builtin_lasx_xvsrarni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvsrarni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvsrarni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrarni_w_d' must be a constant integer}} + return res; +} + +v4i64 xvsrarni_d_q(v4i64 _1, v4i64 _2, int var) { + v4i64 res = __builtin_lasx_xvsrarni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __builtin_lasx_xvsrarni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __builtin_lasx_xvsrarni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrarni_d_q' must be a constant integer}} + return res; +} + +v32i8 xvssrani_b_h(v32i8 _1, v32i8 _2, int var) { + v32i8 res = __builtin_lasx_xvssrani_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvssrani_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvssrani_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_b_h' must be a constant integer}} + return res; +} + +v16i16 xvssrani_h_w(v16i16 _1, v16i16 _2, int var) { + v16i16 res = __builtin_lasx_xvssrani_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvssrani_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvssrani_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_h_w' must be a constant integer}} + return res; +} + +v8i32 xvssrani_w_d(v8i32 _1, v8i32 _2, int var) { + v8i32 res = __builtin_lasx_xvssrani_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvssrani_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvssrani_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_w_d' must be a constant integer}} + return res; +} + +v4i64 xvssrani_d_q(v4i64 _1, v4i64 _2, int var) { + v4i64 res = __builtin_lasx_xvssrani_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __builtin_lasx_xvssrani_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __builtin_lasx_xvssrani_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_d_q' must be a constant integer}} + return res; +} + +v32u8 xvssrani_bu_h(v32u8 _1, v32i8 _2, int var) { + v32u8 res = __builtin_lasx_xvssrani_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvssrani_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvssrani_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_bu_h' must be a constant integer}} + return res; +} + +v16u16 xvssrani_hu_w(v16u16 _1, v16i16 _2, int var) { + v16u16 res = __builtin_lasx_xvssrani_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvssrani_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvssrani_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_hu_w' must be a constant integer}} + return res; +} + +v8u32 xvssrani_wu_d(v8u32 _1, v8i32 _2, int var) { + v8u32 res = __builtin_lasx_xvssrani_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvssrani_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvssrani_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_wu_d' must be a constant integer}} + return res; +} + +v4u64 xvssrani_du_q(v4u64 _1, v4i64 _2, int var) { + v4u64 res = __builtin_lasx_xvssrani_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __builtin_lasx_xvssrani_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __builtin_lasx_xvssrani_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_du_q' must be a constant integer}} + return res; +} + +v32i8 xvssrarni_b_h(v32i8 _1, v32i8 _2, int var) { + v32i8 res = __builtin_lasx_xvssrarni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvssrarni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvssrarni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_b_h' must be a constant integer}} + return res; +} + +v16i16 xvssrarni_h_w(v16i16 _1, v16i16 _2, int var) { + v16i16 res = __builtin_lasx_xvssrarni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvssrarni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvssrarni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_h_w' must be a constant integer}} + return res; +} + +v8i32 xvssrarni_w_d(v8i32 _1, v8i32 _2, int var) { + v8i32 res = __builtin_lasx_xvssrarni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvssrarni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvssrarni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_w_d' must be a constant integer}} + return res; +} + +v4i64 xvssrarni_d_q(v4i64 _1, v4i64 _2, int var) { + v4i64 res = __builtin_lasx_xvssrarni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __builtin_lasx_xvssrarni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __builtin_lasx_xvssrarni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_d_q' must be a constant integer}} + return res; +} + +v32u8 xvssrarni_bu_h(v32u8 _1, v32i8 _2, int var) { + v32u8 res = __builtin_lasx_xvssrarni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvssrarni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lasx_xvssrarni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_bu_h' must be a constant integer}} + return res; +} + +v16u16 xvssrarni_hu_w(v16u16 _1, v16i16 _2, int var) { + v16u16 res = __builtin_lasx_xvssrarni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvssrarni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lasx_xvssrarni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_hu_w' must be a constant integer}} + return res; +} + +v8u32 xvssrarni_wu_d(v8u32 _1, v8i32 _2, int var) { + v8u32 res = __builtin_lasx_xvssrarni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvssrarni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lasx_xvssrarni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_wu_d' must be a constant integer}} + return res; +} + +v4u64 xvssrarni_du_q(v4u64 _1, v4i64 _2, int var) { + v4u64 res = __builtin_lasx_xvssrarni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __builtin_lasx_xvssrarni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __builtin_lasx_xvssrarni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_du_q' must be a constant integer}} + return res; +} + +v4f64 xvpickve_d_f(v4f64 _1, int var) { + v4f64 res = __builtin_lasx_xvpickve_d_f(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} + res += __builtin_lasx_xvpickve_d_f(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + res += __builtin_lasx_xvpickve_d_f(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve_d_f' must be a constant integer}} + return res; +} + +v8f32 xvpickve_w_f(v8f32 _1, int var) { + v8f32 res = __builtin_lasx_xvpickve_w_f(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res += __builtin_lasx_xvpickve_w_f(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res += __builtin_lasx_xvpickve_w_f(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve_w_f' must be a constant integer}} + return res; +} + +v32i8 xvrepli_b(int var) { + v32i8 res = __builtin_lasx_xvrepli_b(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} + res |= __builtin_lasx_xvrepli_b(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} + res |= __builtin_lasx_xvrepli_b(var); // expected-error {{argument to '__builtin_lasx_xvrepli_b' must be a constant integer}} + return res; +} + +v4i64 xvrepli_d(int var) { + v4i64 res = __builtin_lasx_xvrepli_d(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} + res |= __builtin_lasx_xvrepli_d(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} + res |= __builtin_lasx_xvrepli_d(var); // expected-error {{argument to '__builtin_lasx_xvrepli_d' must be a constant integer}} + return res; +} + +v16i16 xvrepli_h(int var) { + v16i16 res = __builtin_lasx_xvrepli_h(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} + res |= __builtin_lasx_xvrepli_h(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} + res |= __builtin_lasx_xvrepli_h(var); // expected-error {{argument to '__builtin_lasx_xvrepli_h' must be a constant integer}} + return res; +} + +v8i32 xvrepli_w(int var) { + v8i32 res = __builtin_lasx_xvrepli_w(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} + res |= __builtin_lasx_xvrepli_w(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} + res |= __builtin_lasx_xvrepli_w(var); // expected-error {{argument to '__builtin_lasx_xvrepli_w' must be a constant integer}} + return res; +} diff --git a/clang/test/CodeGen/LoongArch/lasx/builtin.c b/clang/test/CodeGen/LoongArch/lasx/builtin.c new file mode 100644 index 0000000000000000000000000000000000000000..f52a23a5faea7b2b0e07adf48db3c178d1190c68 --- /dev/null +++ b/clang/test/CodeGen/LoongArch/lasx/builtin.c @@ -0,0 +1,6408 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple loongarch64 -target-feature +lasx -O2 -emit-llvm %s -o - | FileCheck %s + +typedef signed char v32i8 __attribute__((vector_size(32), aligned(32))); +typedef signed char v32i8_b __attribute__((vector_size(32), aligned(1))); +typedef unsigned char v32u8 __attribute__((vector_size(32), aligned(32))); +typedef unsigned char v32u8_b __attribute__((vector_size(32), aligned(1))); +typedef short v16i16 __attribute__((vector_size(32), aligned(32))); +typedef short v16i16_h __attribute__((vector_size(32), aligned(2))); +typedef unsigned short v16u16 __attribute__((vector_size(32), aligned(32))); +typedef unsigned short v16u16_h __attribute__((vector_size(32), aligned(2))); +typedef int v8i32 __attribute__((vector_size(32), aligned(32))); +typedef int v8i32_w __attribute__((vector_size(32), aligned(4))); +typedef unsigned int v8u32 __attribute__((vector_size(32), aligned(32))); +typedef unsigned int v8u32_w __attribute__((vector_size(32), aligned(4))); +typedef long long v4i64 __attribute__((vector_size(32), aligned(32))); +typedef long long v4i64_d __attribute__((vector_size(32), aligned(8))); +typedef unsigned long long v4u64 __attribute__((vector_size(32), aligned(32))); +typedef unsigned long long v4u64_d __attribute__((vector_size(32), aligned(8))); +typedef float v8f32 __attribute__((vector_size(32), aligned(32))); +typedef float v8f32_w __attribute__((vector_size(32), aligned(4))); +typedef double v4f64 __attribute__((vector_size(32), aligned(32))); +typedef double v4f64_d __attribute__((vector_size(32), aligned(8))); + +typedef double v4f64 __attribute__((vector_size(32), aligned(32))); +typedef double v4f64_d __attribute__((vector_size(32), aligned(8))); + +// CHECK-LABEL: @xvsll_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2:![0-9]+]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsll.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsll_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsll_b(_1, _2); } +// CHECK-LABEL: @xvsll_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsll.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsll_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsll_h(_1, _2); } +// CHECK-LABEL: @xvsll_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsll.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsll_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsll_w(_1, _2); } +// CHECK-LABEL: @xvsll_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsll.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsll_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsll_d(_1, _2); } +// CHECK-LABEL: @xvslli_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslli.b(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvslli_b(v32i8 _1) { return __builtin_lasx_xvslli_b(_1, 1); } +// CHECK-LABEL: @xvslli_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslli.h(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvslli_h(v16i16 _1) { return __builtin_lasx_xvslli_h(_1, 1); } +// CHECK-LABEL: @xvslli_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslli.w(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvslli_w(v8i32 _1) { return __builtin_lasx_xvslli_w(_1, 1); } +// CHECK-LABEL: @xvslli_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslli.d(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvslli_d(v4i64 _1) { return __builtin_lasx_xvslli_d(_1, 1); } +// CHECK-LABEL: @xvsra_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsra.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsra_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsra_b(_1, _2); } +// CHECK-LABEL: @xvsra_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsra.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsra_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsra_h(_1, _2); } +// CHECK-LABEL: @xvsra_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsra.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsra_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsra_w(_1, _2); } +// CHECK-LABEL: @xvsra_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsra.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsra_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsra_d(_1, _2); } +// CHECK-LABEL: @xvsrai_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrai.b(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsrai_b(v32i8 _1) { return __builtin_lasx_xvsrai_b(_1, 1); } +// CHECK-LABEL: @xvsrai_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrai.h(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsrai_h(v16i16 _1) { return __builtin_lasx_xvsrai_h(_1, 1); } +// CHECK-LABEL: @xvsrai_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrai.w(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsrai_w(v8i32 _1) { return __builtin_lasx_xvsrai_w(_1, 1); } +// CHECK-LABEL: @xvsrai_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrai.d(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsrai_d(v4i64 _1) { return __builtin_lasx_xvsrai_d(_1, 1); } +// CHECK-LABEL: @xvsrar_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrar.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsrar_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrar_b(_1, _2); } +// CHECK-LABEL: @xvsrar_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrar.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsrar_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrar_h(_1, _2); } +// CHECK-LABEL: @xvsrar_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrar.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsrar_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrar_w(_1, _2); } +// CHECK-LABEL: @xvsrar_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrar.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsrar_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrar_d(_1, _2); } +// CHECK-LABEL: @xvsrari_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrari.b(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsrari_b(v32i8 _1) { return __builtin_lasx_xvsrari_b(_1, 1); } +// CHECK-LABEL: @xvsrari_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrari.h(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsrari_h(v16i16 _1) { return __builtin_lasx_xvsrari_h(_1, 1); } +// CHECK-LABEL: @xvsrari_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrari.w(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsrari_w(v8i32 _1) { return __builtin_lasx_xvsrari_w(_1, 1); } +// CHECK-LABEL: @xvsrari_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrari.d(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsrari_d(v4i64 _1) { return __builtin_lasx_xvsrari_d(_1, 1); } +// CHECK-LABEL: @xvsrl_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrl.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsrl_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrl_b(_1, _2); } +// CHECK-LABEL: @xvsrl_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrl.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsrl_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrl_h(_1, _2); } +// CHECK-LABEL: @xvsrl_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrl.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsrl_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrl_w(_1, _2); } +// CHECK-LABEL: @xvsrl_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrl.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsrl_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrl_d(_1, _2); } +// CHECK-LABEL: @xvsrli_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrli.b(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsrli_b(v32i8 _1) { return __builtin_lasx_xvsrli_b(_1, 1); } +// CHECK-LABEL: @xvsrli_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrli.h(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsrli_h(v16i16 _1) { return __builtin_lasx_xvsrli_h(_1, 1); } +// CHECK-LABEL: @xvsrli_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrli.w(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsrli_w(v8i32 _1) { return __builtin_lasx_xvsrli_w(_1, 1); } +// CHECK-LABEL: @xvsrli_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrli.d(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsrli_d(v4i64 _1) { return __builtin_lasx_xvsrli_d(_1, 1); } +// CHECK-LABEL: @xvsrlr_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlr.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsrlr_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrlr_b(_1, _2); } +// CHECK-LABEL: @xvsrlr_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlr.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsrlr_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrlr_h(_1, _2); } +// CHECK-LABEL: @xvsrlr_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlr.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsrlr_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrlr_w(_1, _2); } +// CHECK-LABEL: @xvsrlr_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlr.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsrlr_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrlr_d(_1, _2); } +// CHECK-LABEL: @xvsrlri_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlri.b(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsrlri_b(v32i8 _1) { return __builtin_lasx_xvsrlri_b(_1, 1); } +// CHECK-LABEL: @xvsrlri_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlri.h(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsrlri_h(v16i16 _1) { return __builtin_lasx_xvsrlri_h(_1, 1); } +// CHECK-LABEL: @xvsrlri_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlri.w(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsrlri_w(v8i32 _1) { return __builtin_lasx_xvsrlri_w(_1, 1); } +// CHECK-LABEL: @xvsrlri_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlri.d(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsrlri_d(v4i64 _1) { return __builtin_lasx_xvsrlri_d(_1, 1); } +// CHECK-LABEL: @xvbitclr_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitclr.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvbitclr_b(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvbitclr_b(_1, _2); } +// CHECK-LABEL: @xvbitclr_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitclr.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvbitclr_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvbitclr_h(_1, _2); } +// CHECK-LABEL: @xvbitclr_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitclr.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvbitclr_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvbitclr_w(_1, _2); } +// CHECK-LABEL: @xvbitclr_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitclr.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvbitclr_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvbitclr_d(_1, _2); } +// CHECK-LABEL: @xvbitclri_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitclri.b(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvbitclri_b(v32u8 _1) { return __builtin_lasx_xvbitclri_b(_1, 1); } +// CHECK-LABEL: @xvbitclri_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitclri.h(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvbitclri_h(v16u16 _1) { return __builtin_lasx_xvbitclri_h(_1, 1); } +// CHECK-LABEL: @xvbitclri_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitclri.w(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvbitclri_w(v8u32 _1) { return __builtin_lasx_xvbitclri_w(_1, 1); } +// CHECK-LABEL: @xvbitclri_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitclri.d(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvbitclri_d(v4u64 _1) { return __builtin_lasx_xvbitclri_d(_1, 1); } +// CHECK-LABEL: @xvbitset_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitset.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvbitset_b(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvbitset_b(_1, _2); } +// CHECK-LABEL: @xvbitset_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitset.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvbitset_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvbitset_h(_1, _2); } +// CHECK-LABEL: @xvbitset_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitset.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvbitset_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvbitset_w(_1, _2); } +// CHECK-LABEL: @xvbitset_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitset.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvbitset_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvbitset_d(_1, _2); } +// CHECK-LABEL: @xvbitseti_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitseti.b(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvbitseti_b(v32u8 _1) { return __builtin_lasx_xvbitseti_b(_1, 1); } +// CHECK-LABEL: @xvbitseti_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitseti.h(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvbitseti_h(v16u16 _1) { return __builtin_lasx_xvbitseti_h(_1, 1); } +// CHECK-LABEL: @xvbitseti_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitseti.w(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvbitseti_w(v8u32 _1) { return __builtin_lasx_xvbitseti_w(_1, 1); } +// CHECK-LABEL: @xvbitseti_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitseti.d(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvbitseti_d(v4u64 _1) { return __builtin_lasx_xvbitseti_d(_1, 1); } +// CHECK-LABEL: @xvbitrev_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitrev.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvbitrev_b(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvbitrev_b(_1, _2); } +// CHECK-LABEL: @xvbitrev_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitrev.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvbitrev_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvbitrev_h(_1, _2); } +// CHECK-LABEL: @xvbitrev_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitrev.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvbitrev_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvbitrev_w(_1, _2); } +// CHECK-LABEL: @xvbitrev_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitrev.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvbitrev_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvbitrev_d(_1, _2); } +// CHECK-LABEL: @xvbitrevi_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitrevi.b(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvbitrevi_b(v32u8 _1) { return __builtin_lasx_xvbitrevi_b(_1, 1); } +// CHECK-LABEL: @xvbitrevi_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvbitrevi.h(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvbitrevi_h(v16u16 _1) { return __builtin_lasx_xvbitrevi_h(_1, 1); } +// CHECK-LABEL: @xvbitrevi_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvbitrevi.w(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvbitrevi_w(v8u32 _1) { return __builtin_lasx_xvbitrevi_w(_1, 1); } +// CHECK-LABEL: @xvbitrevi_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvbitrevi.d(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvbitrevi_d(v4u64 _1) { return __builtin_lasx_xvbitrevi_d(_1, 1); } +// CHECK-LABEL: @xvadd_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvadd.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvadd_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvadd_b(_1, _2); } +// CHECK-LABEL: @xvadd_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvadd.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvadd_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvadd_h(_1, _2); } +// CHECK-LABEL: @xvadd_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvadd.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvadd_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvadd_w(_1, _2); } +// CHECK-LABEL: @xvadd_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadd.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvadd_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvadd_d(_1, _2); } +// CHECK-LABEL: @xvaddi_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvaddi.bu(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvaddi_bu(v32i8 _1) { return __builtin_lasx_xvaddi_bu(_1, 1); } +// CHECK-LABEL: @xvaddi_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddi.hu(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvaddi_hu(v16i16 _1) { return __builtin_lasx_xvaddi_hu(_1, 1); } +// CHECK-LABEL: @xvaddi_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddi.wu(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvaddi_wu(v8i32 _1) { return __builtin_lasx_xvaddi_wu(_1, 1); } +// CHECK-LABEL: @xvaddi_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddi.du(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvaddi_du(v4i64 _1) { return __builtin_lasx_xvaddi_du(_1, 1); } +// CHECK-LABEL: @xvsub_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsub.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsub_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsub_b(_1, _2); } +// CHECK-LABEL: @xvsub_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsub.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsub_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsub_h(_1, _2); } +// CHECK-LABEL: @xvsub_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsub.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsub_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsub_w(_1, _2); } +// CHECK-LABEL: @xvsub_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsub.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsub_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsub_d(_1, _2); } +// CHECK-LABEL: @xvsubi_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsubi.bu(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsubi_bu(v32i8 _1) { return __builtin_lasx_xvsubi_bu(_1, 1); } +// CHECK-LABEL: @xvsubi_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubi.hu(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsubi_hu(v16i16 _1) { return __builtin_lasx_xvsubi_hu(_1, 1); } +// CHECK-LABEL: @xvsubi_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubi.wu(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsubi_wu(v8i32 _1) { return __builtin_lasx_xvsubi_wu(_1, 1); } +// CHECK-LABEL: @xvsubi_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubi.du(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsubi_du(v4i64 _1) { return __builtin_lasx_xvsubi_du(_1, 1); } +// CHECK-LABEL: @xvmax_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmax.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvmax_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmax_b(_1, _2); } +// CHECK-LABEL: @xvmax_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmax.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvmax_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmax_h(_1, _2); } +// CHECK-LABEL: @xvmax_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmax.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvmax_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmax_w(_1, _2); } +// CHECK-LABEL: @xvmax_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmax.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmax_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmax_d(_1, _2); } +// CHECK-LABEL: @xvmaxi_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmaxi.b(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvmaxi_b(v32i8 _1) { return __builtin_lasx_xvmaxi_b(_1, 1); } +// CHECK-LABEL: @xvmaxi_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaxi.h(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvmaxi_h(v16i16 _1) { return __builtin_lasx_xvmaxi_h(_1, 1); } +// CHECK-LABEL: @xvmaxi_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaxi.w(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvmaxi_w(v8i32 _1) { return __builtin_lasx_xvmaxi_w(_1, 1); } +// CHECK-LABEL: @xvmaxi_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaxi.d(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmaxi_d(v4i64 _1) { return __builtin_lasx_xvmaxi_d(_1, 1); } +// CHECK-LABEL: @xvmax_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmax.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvmax_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvmax_bu(_1, _2); } +// CHECK-LABEL: @xvmax_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmax.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvmax_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvmax_hu(_1, _2); } +// CHECK-LABEL: @xvmax_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmax.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvmax_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvmax_wu(_1, _2); } +// CHECK-LABEL: @xvmax_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmax.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvmax_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvmax_du(_1, _2); } +// CHECK-LABEL: @xvmaxi_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmaxi.bu(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvmaxi_bu(v32u8 _1) { return __builtin_lasx_xvmaxi_bu(_1, 1); } +// CHECK-LABEL: @xvmaxi_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaxi.hu(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvmaxi_hu(v16u16 _1) { return __builtin_lasx_xvmaxi_hu(_1, 1); } +// CHECK-LABEL: @xvmaxi_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaxi.wu(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvmaxi_wu(v8u32 _1) { return __builtin_lasx_xvmaxi_wu(_1, 1); } +// CHECK-LABEL: @xvmaxi_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaxi.du(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvmaxi_du(v4u64 _1) { return __builtin_lasx_xvmaxi_du(_1, 1); } +// CHECK-LABEL: @xvmin_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmin.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvmin_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmin_b(_1, _2); } +// CHECK-LABEL: @xvmin_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmin.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvmin_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmin_h(_1, _2); } +// CHECK-LABEL: @xvmin_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmin.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvmin_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmin_w(_1, _2); } +// CHECK-LABEL: @xvmin_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmin.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmin_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmin_d(_1, _2); } +// CHECK-LABEL: @xvmini_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmini.b(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvmini_b(v32i8 _1) { return __builtin_lasx_xvmini_b(_1, 1); } +// CHECK-LABEL: @xvmini_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmini.h(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvmini_h(v16i16 _1) { return __builtin_lasx_xvmini_h(_1, 1); } +// CHECK-LABEL: @xvmini_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmini.w(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvmini_w(v8i32 _1) { return __builtin_lasx_xvmini_w(_1, 1); } +// CHECK-LABEL: @xvmini_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmini.d(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmini_d(v4i64 _1) { return __builtin_lasx_xvmini_d(_1, 1); } +// CHECK-LABEL: @xvmin_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmin.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvmin_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvmin_bu(_1, _2); } +// CHECK-LABEL: @xvmin_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmin.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvmin_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvmin_hu(_1, _2); } +// CHECK-LABEL: @xvmin_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmin.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvmin_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvmin_wu(_1, _2); } +// CHECK-LABEL: @xvmin_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmin.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvmin_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvmin_du(_1, _2); } +// CHECK-LABEL: @xvmini_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmini.bu(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvmini_bu(v32u8 _1) { return __builtin_lasx_xvmini_bu(_1, 1); } +// CHECK-LABEL: @xvmini_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmini.hu(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvmini_hu(v16u16 _1) { return __builtin_lasx_xvmini_hu(_1, 1); } +// CHECK-LABEL: @xvmini_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmini.wu(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvmini_wu(v8u32 _1) { return __builtin_lasx_xvmini_wu(_1, 1); } +// CHECK-LABEL: @xvmini_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmini.du(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvmini_du(v4u64 _1) { return __builtin_lasx_xvmini_du(_1, 1); } +// CHECK-LABEL: @xvseq_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvseq.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvseq_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvseq_b(_1, _2); } +// CHECK-LABEL: @xvseq_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvseq.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvseq_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvseq_h(_1, _2); } +// CHECK-LABEL: @xvseq_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvseq.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvseq_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvseq_w(_1, _2); } +// CHECK-LABEL: @xvseq_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvseq.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvseq_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvseq_d(_1, _2); } +// CHECK-LABEL: @xvseqi_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvseqi.b(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvseqi_b(v32i8 _1) { return __builtin_lasx_xvseqi_b(_1, 1); } +// CHECK-LABEL: @xvseqi_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvseqi.h(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvseqi_h(v16i16 _1) { return __builtin_lasx_xvseqi_h(_1, 1); } +// CHECK-LABEL: @xvseqi_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvseqi.w(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvseqi_w(v8i32 _1) { return __builtin_lasx_xvseqi_w(_1, 1); } +// CHECK-LABEL: @xvseqi_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvseqi.d(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvseqi_d(v4i64 _1) { return __builtin_lasx_xvseqi_d(_1, 1); } +// CHECK-LABEL: @xvslt_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslt.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvslt_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvslt_b(_1, _2); } +// CHECK-LABEL: @xvslt_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslt.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvslt_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvslt_h(_1, _2); } +// CHECK-LABEL: @xvslt_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslt.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvslt_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvslt_w(_1, _2); } +// CHECK-LABEL: @xvslt_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslt.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvslt_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvslt_d(_1, _2); } +// CHECK-LABEL: @xvslti_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslti.b(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvslti_b(v32i8 _1) { return __builtin_lasx_xvslti_b(_1, 1); } +// CHECK-LABEL: @xvslti_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslti.h(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvslti_h(v16i16 _1) { return __builtin_lasx_xvslti_h(_1, 1); } +// CHECK-LABEL: @xvslti_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslti.w(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvslti_w(v8i32 _1) { return __builtin_lasx_xvslti_w(_1, 1); } +// CHECK-LABEL: @xvslti_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslti.d(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvslti_d(v4i64 _1) { return __builtin_lasx_xvslti_d(_1, 1); } +// CHECK-LABEL: @xvslt_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslt.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvslt_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvslt_bu(_1, _2); } +// CHECK-LABEL: @xvslt_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslt.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvslt_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvslt_hu(_1, _2); } +// CHECK-LABEL: @xvslt_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslt.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvslt_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvslt_wu(_1, _2); } +// CHECK-LABEL: @xvslt_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslt.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvslt_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvslt_du(_1, _2); } +// CHECK-LABEL: @xvslti_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslti.bu(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvslti_bu(v32u8 _1) { return __builtin_lasx_xvslti_bu(_1, 1); } +// CHECK-LABEL: @xvslti_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslti.hu(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvslti_hu(v16u16 _1) { return __builtin_lasx_xvslti_hu(_1, 1); } +// CHECK-LABEL: @xvslti_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslti.wu(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvslti_wu(v8u32 _1) { return __builtin_lasx_xvslti_wu(_1, 1); } +// CHECK-LABEL: @xvslti_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslti.du(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvslti_du(v4u64 _1) { return __builtin_lasx_xvslti_du(_1, 1); } +// CHECK-LABEL: @xvsle_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsle.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsle_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsle_b(_1, _2); } +// CHECK-LABEL: @xvsle_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsle.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsle_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsle_h(_1, _2); } +// CHECK-LABEL: @xvsle_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsle.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsle_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsle_w(_1, _2); } +// CHECK-LABEL: @xvsle_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsle.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsle_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsle_d(_1, _2); } +// CHECK-LABEL: @xvslei_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslei.b(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvslei_b(v32i8 _1) { return __builtin_lasx_xvslei_b(_1, 1); } +// CHECK-LABEL: @xvslei_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslei.h(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvslei_h(v16i16 _1) { return __builtin_lasx_xvslei_h(_1, 1); } +// CHECK-LABEL: @xvslei_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslei.w(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvslei_w(v8i32 _1) { return __builtin_lasx_xvslei_w(_1, 1); } +// CHECK-LABEL: @xvslei_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslei.d(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvslei_d(v4i64 _1) { return __builtin_lasx_xvslei_d(_1, 1); } +// CHECK-LABEL: @xvsle_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsle.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsle_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvsle_bu(_1, _2); } +// CHECK-LABEL: @xvsle_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsle.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsle_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvsle_hu(_1, _2); } +// CHECK-LABEL: @xvsle_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsle.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsle_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvsle_wu(_1, _2); } +// CHECK-LABEL: @xvsle_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsle.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsle_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvsle_du(_1, _2); } +// CHECK-LABEL: @xvslei_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvslei.bu(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvslei_bu(v32u8 _1) { return __builtin_lasx_xvslei_bu(_1, 1); } +// CHECK-LABEL: @xvslei_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvslei.hu(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvslei_hu(v16u16 _1) { return __builtin_lasx_xvslei_hu(_1, 1); } +// CHECK-LABEL: @xvslei_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvslei.wu(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvslei_wu(v8u32 _1) { return __builtin_lasx_xvslei_wu(_1, 1); } +// CHECK-LABEL: @xvslei_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvslei.du(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvslei_du(v4u64 _1) { return __builtin_lasx_xvslei_du(_1, 1); } +// CHECK-LABEL: @xvsat_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsat.b(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsat_b(v32i8 _1) { return __builtin_lasx_xvsat_b(_1, 1); } +// CHECK-LABEL: @xvsat_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsat.h(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsat_h(v16i16 _1) { return __builtin_lasx_xvsat_h(_1, 1); } +// CHECK-LABEL: @xvsat_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsat.w(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsat_w(v8i32 _1) { return __builtin_lasx_xvsat_w(_1, 1); } +// CHECK-LABEL: @xvsat_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsat.d(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsat_d(v4i64 _1) { return __builtin_lasx_xvsat_d(_1, 1); } +// CHECK-LABEL: @xvsat_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsat.bu(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvsat_bu(v32u8 _1) { return __builtin_lasx_xvsat_bu(_1, 1); } +// CHECK-LABEL: @xvsat_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsat.hu(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvsat_hu(v16u16 _1) { return __builtin_lasx_xvsat_hu(_1, 1); } +// CHECK-LABEL: @xvsat_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsat.wu(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvsat_wu(v8u32 _1) { return __builtin_lasx_xvsat_wu(_1, 1); } +// CHECK-LABEL: @xvsat_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsat.du(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvsat_du(v4u64 _1) { return __builtin_lasx_xvsat_du(_1, 1); } +// CHECK-LABEL: @xvadda_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvadda.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvadda_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvadda_b(_1, _2); } +// CHECK-LABEL: @xvadda_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvadda.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvadda_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvadda_h(_1, _2); } +// CHECK-LABEL: @xvadda_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvadda.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvadda_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvadda_w(_1, _2); } +// CHECK-LABEL: @xvadda_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadda.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvadda_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvadda_d(_1, _2); } +// CHECK-LABEL: @xvsadd_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsadd.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsadd_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsadd_b(_1, _2); } +// CHECK-LABEL: @xvsadd_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsadd.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsadd_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsadd_h(_1, _2); } +// CHECK-LABEL: @xvsadd_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsadd.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsadd_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsadd_w(_1, _2); } +// CHECK-LABEL: @xvsadd_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsadd.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsadd_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsadd_d(_1, _2); } +// CHECK-LABEL: @xvsadd_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsadd.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvsadd_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvsadd_bu(_1, _2); } +// CHECK-LABEL: @xvsadd_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsadd.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvsadd_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvsadd_hu(_1, _2); } +// CHECK-LABEL: @xvsadd_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsadd.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvsadd_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvsadd_wu(_1, _2); } +// CHECK-LABEL: @xvsadd_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsadd.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvsadd_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvsadd_du(_1, _2); } +// CHECK-LABEL: @xvavg_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavg.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvavg_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvavg_b(_1, _2); } +// CHECK-LABEL: @xvavg_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavg.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvavg_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvavg_h(_1, _2); } +// CHECK-LABEL: @xvavg_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavg.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvavg_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvavg_w(_1, _2); } +// CHECK-LABEL: @xvavg_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavg.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvavg_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvavg_d(_1, _2); } +// CHECK-LABEL: @xvavg_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavg.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvavg_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvavg_bu(_1, _2); } +// CHECK-LABEL: @xvavg_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavg.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvavg_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvavg_hu(_1, _2); } +// CHECK-LABEL: @xvavg_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavg.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvavg_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvavg_wu(_1, _2); } +// CHECK-LABEL: @xvavg_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavg.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvavg_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvavg_du(_1, _2); } +// CHECK-LABEL: @xvavgr_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavgr.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvavgr_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvavgr_b(_1, _2); } +// CHECK-LABEL: @xvavgr_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavgr.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvavgr_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvavgr_h(_1, _2); } +// CHECK-LABEL: @xvavgr_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavgr.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvavgr_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvavgr_w(_1, _2); } +// CHECK-LABEL: @xvavgr_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavgr.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvavgr_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvavgr_d(_1, _2); } +// CHECK-LABEL: @xvavgr_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvavgr.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvavgr_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvavgr_bu(_1, _2); } +// CHECK-LABEL: @xvavgr_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvavgr.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvavgr_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvavgr_hu(_1, _2); } +// CHECK-LABEL: @xvavgr_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvavgr.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvavgr_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvavgr_wu(_1, _2); } +// CHECK-LABEL: @xvavgr_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvavgr.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvavgr_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvavgr_du(_1, _2); } +// CHECK-LABEL: @xvssub_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssub.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvssub_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvssub_b(_1, _2); } +// CHECK-LABEL: @xvssub_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssub.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvssub_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssub_h(_1, _2); } +// CHECK-LABEL: @xvssub_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssub.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvssub_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssub_w(_1, _2); } +// CHECK-LABEL: @xvssub_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssub.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvssub_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssub_d(_1, _2); } +// CHECK-LABEL: @xvssub_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssub.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvssub_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvssub_bu(_1, _2); } +// CHECK-LABEL: @xvssub_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssub.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvssub_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvssub_hu(_1, _2); } +// CHECK-LABEL: @xvssub_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssub.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvssub_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvssub_wu(_1, _2); } +// CHECK-LABEL: @xvssub_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssub.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvssub_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvssub_du(_1, _2); } +// CHECK-LABEL: @xvabsd_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvabsd.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvabsd_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvabsd_b(_1, _2); } +// CHECK-LABEL: @xvabsd_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvabsd.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvabsd_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvabsd_h(_1, _2); } +// CHECK-LABEL: @xvabsd_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvabsd.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvabsd_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvabsd_w(_1, _2); } +// CHECK-LABEL: @xvabsd_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvabsd.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvabsd_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvabsd_d(_1, _2); } +// CHECK-LABEL: @xvabsd_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvabsd.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvabsd_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvabsd_bu(_1, _2); } +// CHECK-LABEL: @xvabsd_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvabsd.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvabsd_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvabsd_hu(_1, _2); } +// CHECK-LABEL: @xvabsd_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvabsd.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvabsd_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvabsd_wu(_1, _2); } +// CHECK-LABEL: @xvabsd_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvabsd.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvabsd_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvabsd_du(_1, _2); } +// CHECK-LABEL: @xvmul_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmul.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvmul_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmul_b(_1, _2); } +// CHECK-LABEL: @xvmul_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmul.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvmul_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmul_h(_1, _2); } +// CHECK-LABEL: @xvmul_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmul.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvmul_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmul_w(_1, _2); } +// CHECK-LABEL: @xvmul_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmul.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmul_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmul_d(_1, _2); } +// CHECK-LABEL: @xvmadd_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmadd.b(<32 x i8> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]]) +// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvmadd_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __builtin_lasx_xvmadd_b(_1, _2, _3); } +// CHECK-LABEL: @xvmadd_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmadd.h(<16 x i16> [[_1]], <16 x i16> [[_2]], <16 x i16> [[_3]]) +// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvmadd_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __builtin_lasx_xvmadd_h(_1, _2, _3); } +// CHECK-LABEL: @xvmadd_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmadd.w(<8 x i32> [[_1]], <8 x i32> [[_2]], <8 x i32> [[_3]]) +// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvmadd_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __builtin_lasx_xvmadd_w(_1, _2, _3); } +// CHECK-LABEL: @xvmadd_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmadd.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmadd_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __builtin_lasx_xvmadd_d(_1, _2, _3); } +// CHECK-LABEL: @xvmsub_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmsub.b(<32 x i8> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]]) +// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvmsub_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __builtin_lasx_xvmsub_b(_1, _2, _3); } +// CHECK-LABEL: @xvmsub_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmsub.h(<16 x i16> [[_1]], <16 x i16> [[_2]], <16 x i16> [[_3]]) +// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvmsub_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __builtin_lasx_xvmsub_h(_1, _2, _3); } +// CHECK-LABEL: @xvmsub_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmsub.w(<8 x i32> [[_1]], <8 x i32> [[_2]], <8 x i32> [[_3]]) +// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvmsub_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __builtin_lasx_xvmsub_w(_1, _2, _3); } +// CHECK-LABEL: @xvmsub_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmsub.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmsub_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __builtin_lasx_xvmsub_d(_1, _2, _3); } +// CHECK-LABEL: @xvdiv_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvdiv.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvdiv_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvdiv_b(_1, _2); } +// CHECK-LABEL: @xvdiv_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvdiv.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvdiv_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvdiv_h(_1, _2); } +// CHECK-LABEL: @xvdiv_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvdiv.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvdiv_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvdiv_w(_1, _2); } +// CHECK-LABEL: @xvdiv_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvdiv.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvdiv_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvdiv_d(_1, _2); } +// CHECK-LABEL: @xvdiv_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvdiv.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvdiv_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvdiv_bu(_1, _2); } +// CHECK-LABEL: @xvdiv_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvdiv.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvdiv_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvdiv_hu(_1, _2); } +// CHECK-LABEL: @xvdiv_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvdiv.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvdiv_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvdiv_wu(_1, _2); } +// CHECK-LABEL: @xvdiv_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvdiv.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvdiv_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvdiv_du(_1, _2); } +// CHECK-LABEL: @xvhaddw_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhaddw.h.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvhaddw_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvhaddw_h_b(_1, _2); } +// CHECK-LABEL: @xvhaddw_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhaddw.w.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvhaddw_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvhaddw_w_h(_1, _2); } +// CHECK-LABEL: @xvhaddw_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.d.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvhaddw_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvhaddw_d_w(_1, _2); } +// CHECK-LABEL: @xvhaddw_hu_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhaddw.hu.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvhaddw_hu_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvhaddw_hu_bu(_1, _2); } +// CHECK-LABEL: @xvhaddw_wu_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhaddw.wu.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvhaddw_wu_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvhaddw_wu_hu(_1, _2); } +// CHECK-LABEL: @xvhaddw_du_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.du.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvhaddw_du_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvhaddw_du_wu(_1, _2); } +// CHECK-LABEL: @xvhsubw_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhsubw.h.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvhsubw_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvhsubw_h_b(_1, _2); } +// CHECK-LABEL: @xvhsubw_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhsubw.w.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvhsubw_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvhsubw_w_h(_1, _2); } +// CHECK-LABEL: @xvhsubw_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.d.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvhsubw_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvhsubw_d_w(_1, _2); } +// CHECK-LABEL: @xvhsubw_hu_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvhsubw.hu.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvhsubw_hu_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvhsubw_hu_bu(_1, _2); } +// CHECK-LABEL: @xvhsubw_wu_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvhsubw.wu.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvhsubw_wu_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvhsubw_wu_hu(_1, _2); } +// CHECK-LABEL: @xvhsubw_du_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.du.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvhsubw_du_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvhsubw_du_wu(_1, _2); } +// CHECK-LABEL: @xvmod_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmod.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvmod_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmod_b(_1, _2); } +// CHECK-LABEL: @xvmod_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmod.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvmod_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmod_h(_1, _2); } +// CHECK-LABEL: @xvmod_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmod.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvmod_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmod_w(_1, _2); } +// CHECK-LABEL: @xvmod_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmod.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmod_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmod_d(_1, _2); } +// CHECK-LABEL: @xvmod_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmod.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvmod_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvmod_bu(_1, _2); } +// CHECK-LABEL: @xvmod_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmod.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvmod_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvmod_hu(_1, _2); } +// CHECK-LABEL: @xvmod_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmod.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvmod_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvmod_wu(_1, _2); } +// CHECK-LABEL: @xvmod_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmod.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvmod_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvmod_du(_1, _2); } +// CHECK-LABEL: @xvrepl128vei_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrepl128vei.b(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvrepl128vei_b(v32i8 _1) { return __builtin_lasx_xvrepl128vei_b(_1, 1); } +// CHECK-LABEL: @xvrepl128vei_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrepl128vei.h(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvrepl128vei_h(v16i16 _1) { return __builtin_lasx_xvrepl128vei_h(_1, 1); } +// CHECK-LABEL: @xvrepl128vei_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrepl128vei.w(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvrepl128vei_w(v8i32 _1) { return __builtin_lasx_xvrepl128vei_w(_1, 1); } +// CHECK-LABEL: @xvrepl128vei_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrepl128vei.d(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvrepl128vei_d(v4i64 _1) { return __builtin_lasx_xvrepl128vei_d(_1, 1); } +// CHECK-LABEL: @xvpickev_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpickev.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvpickev_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvpickev_b(_1, _2); } +// CHECK-LABEL: @xvpickev_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpickev.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvpickev_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvpickev_h(_1, _2); } +// CHECK-LABEL: @xvpickev_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickev.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvpickev_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvpickev_w(_1, _2); } +// CHECK-LABEL: @xvpickev_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickev.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvpickev_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvpickev_d(_1, _2); } +// CHECK-LABEL: @xvpickod_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpickod.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvpickod_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvpickod_b(_1, _2); } +// CHECK-LABEL: @xvpickod_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpickod.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvpickod_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvpickod_h(_1, _2); } +// CHECK-LABEL: @xvpickod_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickod.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvpickod_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvpickod_w(_1, _2); } +// CHECK-LABEL: @xvpickod_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickod.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvpickod_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvpickod_d(_1, _2); } +// CHECK-LABEL: @xvilvh_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvilvh.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvilvh_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvilvh_b(_1, _2); } +// CHECK-LABEL: @xvilvh_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvilvh.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvilvh_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvilvh_h(_1, _2); } +// CHECK-LABEL: @xvilvh_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvilvh.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvilvh_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvilvh_w(_1, _2); } +// CHECK-LABEL: @xvilvh_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvilvh.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvilvh_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvilvh_d(_1, _2); } +// CHECK-LABEL: @xvilvl_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvilvl.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvilvl_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvilvl_b(_1, _2); } +// CHECK-LABEL: @xvilvl_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvilvl.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvilvl_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvilvl_h(_1, _2); } +// CHECK-LABEL: @xvilvl_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvilvl.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvilvl_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvilvl_w(_1, _2); } +// CHECK-LABEL: @xvilvl_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvilvl.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvilvl_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvilvl_d(_1, _2); } +// CHECK-LABEL: @xvpackev_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpackev.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvpackev_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvpackev_b(_1, _2); } +// CHECK-LABEL: @xvpackev_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpackev.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvpackev_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvpackev_h(_1, _2); } +// CHECK-LABEL: @xvpackev_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpackev.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvpackev_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvpackev_w(_1, _2); } +// CHECK-LABEL: @xvpackev_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpackev.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvpackev_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvpackev_d(_1, _2); } +// CHECK-LABEL: @xvpackod_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpackod.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvpackod_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvpackod_b(_1, _2); } +// CHECK-LABEL: @xvpackod_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpackod.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvpackod_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvpackod_h(_1, _2); } +// CHECK-LABEL: @xvpackod_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpackod.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvpackod_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvpackod_w(_1, _2); } +// CHECK-LABEL: @xvpackod_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpackod.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvpackod_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvpackod_d(_1, _2); } +// CHECK-LABEL: @xvshuf_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvshuf.b(<32 x i8> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]]) +// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvshuf_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __builtin_lasx_xvshuf_b(_1, _2, _3); } +// CHECK-LABEL: @xvshuf_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvshuf.h(<16 x i16> [[_1]], <16 x i16> [[_2]], <16 x i16> [[_3]]) +// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvshuf_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __builtin_lasx_xvshuf_h(_1, _2, _3); } +// CHECK-LABEL: @xvshuf_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvshuf.w(<8 x i32> [[_1]], <8 x i32> [[_2]], <8 x i32> [[_3]]) +// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvshuf_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __builtin_lasx_xvshuf_w(_1, _2, _3); } +// CHECK-LABEL: @xvshuf_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvshuf.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvshuf_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __builtin_lasx_xvshuf_d(_1, _2, _3); } +// CHECK-LABEL: @xvand_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvand.v(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvand_v(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvand_v(_1, _2); } +// CHECK-LABEL: @xvandi_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvandi.b(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvandi_b(v32u8 _1) { return __builtin_lasx_xvandi_b(_1, 1); } +// CHECK-LABEL: @xvor_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvor.v(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvor_v(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvor_v(_1, _2); } +// CHECK-LABEL: @xvori_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvori.b(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvori_b(v32u8 _1) { return __builtin_lasx_xvori_b(_1, 1); } +// CHECK-LABEL: @xvnor_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvnor.v(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvnor_v(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvnor_v(_1, _2); } +// CHECK-LABEL: @xvnori_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvnori.b(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvnori_b(v32u8 _1) { return __builtin_lasx_xvnori_b(_1, 1); } +// CHECK-LABEL: @xvxor_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvxor.v(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvxor_v(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvxor_v(_1, _2); } +// CHECK-LABEL: @xvxori_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvxori.b(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvxori_b(v32u8 _1) { return __builtin_lasx_xvxori_b(_1, 1); } +// CHECK-LABEL: @xvbitsel_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitsel.v(<32 x i8> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]]) +// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvbitsel_v(v32u8 _1, v32u8 _2, v32u8 _3) { return __builtin_lasx_xvbitsel_v(_1, _2, _3); } +// CHECK-LABEL: @xvbitseli_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbitseli.b(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvbitseli_b(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvbitseli_b(_1, _2, 1); } +// CHECK-LABEL: @xvshuf4i_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvshuf4i.b(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvshuf4i_b(v32i8 _1) { return __builtin_lasx_xvshuf4i_b(_1, 1); } +// CHECK-LABEL: @xvshuf4i_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvshuf4i.h(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvshuf4i_h(v16i16 _1) { return __builtin_lasx_xvshuf4i_h(_1, 1); } +// CHECK-LABEL: @xvshuf4i_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvshuf4i.w(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvshuf4i_w(v8i32 _1) { return __builtin_lasx_xvshuf4i_w(_1, 1); } +// CHECK-LABEL: @xvreplgr2vr_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplgr2vr.b(i32 [[_1:%.*]]) +// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvreplgr2vr_b(int _1) { return __builtin_lasx_xvreplgr2vr_b(_1); } +// CHECK-LABEL: @xvreplgr2vr_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplgr2vr.h(i32 [[_1:%.*]]) +// CHECK-NEXT: store <16 x i16> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvreplgr2vr_h(int _1) { return __builtin_lasx_xvreplgr2vr_h(_1); } +// CHECK-LABEL: @xvreplgr2vr_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplgr2vr.w(i32 [[_1:%.*]]) +// CHECK-NEXT: store <8 x i32> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvreplgr2vr_w(int _1) { return __builtin_lasx_xvreplgr2vr_w(_1); } +// CHECK-LABEL: @xvreplgr2vr_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[CONV:%.*]] = sext i32 [[_1:%.*]] to i64 +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplgr2vr.d(i64 [[CONV]]) +// CHECK-NEXT: store <4 x i64> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvreplgr2vr_d(int _1) { return __builtin_lasx_xvreplgr2vr_d(_1); } +// CHECK-LABEL: @xvpcnt_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpcnt.b(<32 x i8> [[_1]]) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvpcnt_b(v32i8 _1) { return __builtin_lasx_xvpcnt_b(_1); } +// CHECK-LABEL: @xvpcnt_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvpcnt.h(<16 x i16> [[_1]]) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvpcnt_h(v16i16 _1) { return __builtin_lasx_xvpcnt_h(_1); } +// CHECK-LABEL: @xvpcnt_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpcnt.w(<8 x i32> [[_1]]) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvpcnt_w(v8i32 _1) { return __builtin_lasx_xvpcnt_w(_1); } +// CHECK-LABEL: @xvpcnt_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpcnt.d(<4 x i64> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvpcnt_d(v4i64 _1) { return __builtin_lasx_xvpcnt_d(_1); } +// CHECK-LABEL: @xvclo_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvclo.b(<32 x i8> [[_1]]) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvclo_b(v32i8 _1) { return __builtin_lasx_xvclo_b(_1); } +// CHECK-LABEL: @xvclo_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvclo.h(<16 x i16> [[_1]]) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvclo_h(v16i16 _1) { return __builtin_lasx_xvclo_h(_1); } +// CHECK-LABEL: @xvclo_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvclo.w(<8 x i32> [[_1]]) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvclo_w(v8i32 _1) { return __builtin_lasx_xvclo_w(_1); } +// CHECK-LABEL: @xvclo_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvclo.d(<4 x i64> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvclo_d(v4i64 _1) { return __builtin_lasx_xvclo_d(_1); } +// CHECK-LABEL: @xvclz_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvclz.b(<32 x i8> [[_1]]) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvclz_b(v32i8 _1) { return __builtin_lasx_xvclz_b(_1); } +// CHECK-LABEL: @xvclz_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvclz.h(<16 x i16> [[_1]]) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvclz_h(v16i16 _1) { return __builtin_lasx_xvclz_h(_1); } +// CHECK-LABEL: @xvclz_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvclz.w(<8 x i32> [[_1]]) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvclz_w(v8i32 _1) { return __builtin_lasx_xvclz_w(_1); } +// CHECK-LABEL: @xvclz_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvclz.d(<4 x i64> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvclz_d(v4i64 _1) { return __builtin_lasx_xvclz_d(_1); } +// CHECK-LABEL: @xvfadd_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfadd.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvfadd_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfadd_s(_1, _2); } +// CHECK-LABEL: @xvfadd_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfadd.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvfadd_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfadd_d(_1, _2); } +// CHECK-LABEL: @xvfsub_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfsub.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvfsub_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfsub_s(_1, _2); } +// CHECK-LABEL: @xvfsub_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfsub.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvfsub_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfsub_d(_1, _2); } +// CHECK-LABEL: @xvfmul_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmul.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvfmul_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfmul_s(_1, _2); } +// CHECK-LABEL: @xvfmul_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmul.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvfmul_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfmul_d(_1, _2); } +// CHECK-LABEL: @xvfdiv_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfdiv.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvfdiv_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfdiv_s(_1, _2); } +// CHECK-LABEL: @xvfdiv_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfdiv.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvfdiv_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfdiv_d(_1, _2); } +// CHECK-LABEL: @xvfcvt_h_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfcvt.h.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvfcvt_h_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcvt_h_s(_1, _2); } +// CHECK-LABEL: @xvfcvt_s_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvt.s.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvfcvt_s_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcvt_s_d(_1, _2); } +// CHECK-LABEL: @xvfmin_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmin.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvfmin_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfmin_s(_1, _2); } +// CHECK-LABEL: @xvfmin_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmin.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvfmin_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfmin_d(_1, _2); } +// CHECK-LABEL: @xvfmina_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmina.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvfmina_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfmina_s(_1, _2); } +// CHECK-LABEL: @xvfmina_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmina.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvfmina_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfmina_d(_1, _2); } +// CHECK-LABEL: @xvfmax_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmax.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvfmax_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfmax_s(_1, _2); } +// CHECK-LABEL: @xvfmax_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmax.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvfmax_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfmax_d(_1, _2); } +// CHECK-LABEL: @xvfmaxa_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmaxa.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvfmaxa_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfmaxa_s(_1, _2); } +// CHECK-LABEL: @xvfmaxa_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmaxa.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvfmaxa_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfmaxa_d(_1, _2); } +// CHECK-LABEL: @xvfclass_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfclass.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfclass_s(v8f32 _1) { return __builtin_lasx_xvfclass_s(_1); } +// CHECK-LABEL: @xvfclass_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfclass.d(<4 x double> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfclass_d(v4f64 _1) { return __builtin_lasx_xvfclass_d(_1); } +// CHECK-LABEL: @xvfsqrt_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfsqrt.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvfsqrt_s(v8f32 _1) { return __builtin_lasx_xvfsqrt_s(_1); } +// CHECK-LABEL: @xvfsqrt_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfsqrt.d(<4 x double> [[_1]]) +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvfsqrt_d(v4f64 _1) { return __builtin_lasx_xvfsqrt_d(_1); } +// CHECK-LABEL: @xvfrecip_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrecip.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvfrecip_s(v8f32 _1) { return __builtin_lasx_xvfrecip_s(_1); } +// CHECK-LABEL: @xvfrecip_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrecip.d(<4 x double> [[_1]]) +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvfrecip_d(v4f64 _1) { return __builtin_lasx_xvfrecip_d(_1); } +// CHECK-LABEL: @xvfrint_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrint.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvfrint_s(v8f32 _1) { return __builtin_lasx_xvfrint_s(_1); } +// CHECK-LABEL: @xvfrint_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrint.d(<4 x double> [[_1]]) +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvfrint_d(v4f64 _1) { return __builtin_lasx_xvfrint_d(_1); } +// CHECK-LABEL: @xvfrsqrt_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrsqrt.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvfrsqrt_s(v8f32 _1) { return __builtin_lasx_xvfrsqrt_s(_1); } +// CHECK-LABEL: @xvfrsqrt_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrsqrt.d(<4 x double> [[_1]]) +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvfrsqrt_d(v4f64 _1) { return __builtin_lasx_xvfrsqrt_d(_1); } +// CHECK-LABEL: @xvflogb_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvflogb.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvflogb_s(v8f32 _1) { return __builtin_lasx_xvflogb_s(_1); } +// CHECK-LABEL: @xvflogb_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvflogb.d(<4 x double> [[_1]]) +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvflogb_d(v4f64 _1) { return __builtin_lasx_xvflogb_d(_1); } +// CHECK-LABEL: @xvfcvth_s_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvth.s.h(<16 x i16> [[_1]]) +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvfcvth_s_h(v16i16 _1) { return __builtin_lasx_xvfcvth_s_h(_1); } +// CHECK-LABEL: @xvfcvth_d_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfcvth.d.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvfcvth_d_s(v8f32 _1) { return __builtin_lasx_xvfcvth_d_s(_1); } +// CHECK-LABEL: @xvfcvtl_s_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfcvtl.s.h(<16 x i16> [[_1]]) +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvfcvtl_s_h(v16i16 _1) { return __builtin_lasx_xvfcvtl_s_h(_1); } +// CHECK-LABEL: @xvfcvtl_d_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfcvtl.d.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvfcvtl_d_s(v8f32 _1) { return __builtin_lasx_xvfcvtl_d_s(_1); } +// CHECK-LABEL: @xvftint_w_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.w.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvftint_w_s(v8f32 _1) { return __builtin_lasx_xvftint_w_s(_1); } +// CHECK-LABEL: @xvftint_l_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftint.l.d(<4 x double> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvftint_l_d(v4f64 _1) { return __builtin_lasx_xvftint_l_d(_1); } +// CHECK-LABEL: @xvftint_wu_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.wu.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvftint_wu_s(v8f32 _1) { return __builtin_lasx_xvftint_wu_s(_1); } +// CHECK-LABEL: @xvftint_lu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftint.lu.d(<4 x double> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvftint_lu_d(v4f64 _1) { return __builtin_lasx_xvftint_lu_d(_1); } +// CHECK-LABEL: @xvftintrz_w_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvftintrz_w_s(v8f32 _1) { return __builtin_lasx_xvftintrz_w_s(_1); } +// CHECK-LABEL: @xvftintrz_l_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrz.l.d(<4 x double> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvftintrz_l_d(v4f64 _1) { return __builtin_lasx_xvftintrz_l_d(_1); } +// CHECK-LABEL: @xvftintrz_wu_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.wu.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvftintrz_wu_s(v8f32 _1) { return __builtin_lasx_xvftintrz_wu_s(_1); } +// CHECK-LABEL: @xvftintrz_lu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrz.lu.d(<4 x double> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvftintrz_lu_d(v4f64 _1) { return __builtin_lasx_xvftintrz_lu_d(_1); } +// CHECK-LABEL: @xvffint_s_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.w(<8 x i32> [[_1]]) +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvffint_s_w(v8i32 _1) { return __builtin_lasx_xvffint_s_w(_1); } +// CHECK-LABEL: @xvffint_d_l( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffint.d.l(<4 x i64> [[_1]]) +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvffint_d_l(v4i64 _1) { return __builtin_lasx_xvffint_d_l(_1); } +// CHECK-LABEL: @xvffint_s_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.wu(<8 x i32> [[_1]]) +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvffint_s_wu(v8u32 _1) { return __builtin_lasx_xvffint_s_wu(_1); } +// CHECK-LABEL: @xvffint_d_lu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffint.d.lu(<4 x i64> [[_1]]) +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvffint_d_lu(v4u64 _1) { return __builtin_lasx_xvffint_d_lu(_1); } +// CHECK-LABEL: @xvreplve_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve.b(<32 x i8> [[_1]], i32 [[_2:%.*]]) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvreplve_b(v32i8 _1, int _2) { return __builtin_lasx_xvreplve_b(_1, _2); } +// CHECK-LABEL: @xvreplve_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplve.h(<16 x i16> [[_1]], i32 [[_2:%.*]]) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvreplve_h(v16i16 _1, int _2) { return __builtin_lasx_xvreplve_h(_1, _2); } +// CHECK-LABEL: @xvreplve_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplve.w(<8 x i32> [[_1]], i32 [[_2:%.*]]) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvreplve_w(v8i32 _1, int _2) { return __builtin_lasx_xvreplve_w(_1, _2); } +// CHECK-LABEL: @xvreplve_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplve.d(<4 x i64> [[_1]], i32 [[_2:%.*]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvreplve_d(v4i64 _1, int _2) { return __builtin_lasx_xvreplve_d(_1, _2); } +// CHECK-LABEL: @xvpermi_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpermi.w(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvpermi_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvpermi_w(_1, _2, 1); } +// CHECK-LABEL: @xvandn_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvandn.v(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvandn_v(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvandn_v(_1, _2); } +// CHECK-LABEL: @xvneg_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvneg.b(<32 x i8> [[_1]]) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvneg_b(v32i8 _1) { return __builtin_lasx_xvneg_b(_1); } +// CHECK-LABEL: @xvneg_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvneg.h(<16 x i16> [[_1]]) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvneg_h(v16i16 _1) { return __builtin_lasx_xvneg_h(_1); } +// CHECK-LABEL: @xvneg_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvneg.w(<8 x i32> [[_1]]) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvneg_w(v8i32 _1) { return __builtin_lasx_xvneg_w(_1); } +// CHECK-LABEL: @xvneg_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvneg.d(<4 x i64> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvneg_d(v4i64 _1) { return __builtin_lasx_xvneg_d(_1); } +// CHECK-LABEL: @xvmuh_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmuh.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvmuh_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmuh_b(_1, _2); } +// CHECK-LABEL: @xvmuh_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmuh.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvmuh_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmuh_h(_1, _2); } +// CHECK-LABEL: @xvmuh_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmuh.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvmuh_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmuh_w(_1, _2); } +// CHECK-LABEL: @xvmuh_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmuh.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmuh_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmuh_d(_1, _2); } +// CHECK-LABEL: @xvmuh_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmuh.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvmuh_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvmuh_bu(_1, _2); } +// CHECK-LABEL: @xvmuh_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmuh.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvmuh_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvmuh_hu(_1, _2); } +// CHECK-LABEL: @xvmuh_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmuh.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvmuh_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvmuh_wu(_1, _2); } +// CHECK-LABEL: @xvmuh_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmuh.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvmuh_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvmuh_du(_1, _2); } +// CHECK-LABEL: @xvsllwil_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsllwil.h.b(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsllwil_h_b(v32i8 _1) { return __builtin_lasx_xvsllwil_h_b(_1, 1); } +// CHECK-LABEL: @xvsllwil_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsllwil.w.h(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsllwil_w_h(v16i16 _1) { return __builtin_lasx_xvsllwil_w_h(_1, 1); } +// CHECK-LABEL: @xvsllwil_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsllwil.d.w(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsllwil_d_w(v8i32 _1) { return __builtin_lasx_xvsllwil_d_w(_1, 1); } +// CHECK-LABEL: @xvsllwil_hu_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsllwil.hu.bu(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvsllwil_hu_bu(v32u8 _1) { return __builtin_lasx_xvsllwil_hu_bu(_1, 1); } +// CHECK-LABEL: @xvsllwil_wu_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsllwil.wu.hu(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvsllwil_wu_hu(v16u16 _1) { return __builtin_lasx_xvsllwil_wu_hu(_1, 1); } +// CHECK-LABEL: @xvsllwil_du_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsllwil.du.wu(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvsllwil_du_wu(v8u32 _1) { return __builtin_lasx_xvsllwil_du_wu(_1, 1); } +// CHECK-LABEL: @xvsran_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsran.b.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsran_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsran_b_h(_1, _2); } +// CHECK-LABEL: @xvsran_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsran.h.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsran_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsran_h_w(_1, _2); } +// CHECK-LABEL: @xvsran_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsran.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsran_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsran_w_d(_1, _2); } +// CHECK-LABEL: @xvssran_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssran.b.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvssran_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssran_b_h(_1, _2); } +// CHECK-LABEL: @xvssran_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssran.h.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvssran_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssran_h_w(_1, _2); } +// CHECK-LABEL: @xvssran_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssran.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvssran_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssran_w_d(_1, _2); } +// CHECK-LABEL: @xvssran_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssran.bu.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvssran_bu_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvssran_bu_h(_1, _2); } +// CHECK-LABEL: @xvssran_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssran.hu.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvssran_hu_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvssran_hu_w(_1, _2); } +// CHECK-LABEL: @xvssran_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssran.wu.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvssran_wu_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvssran_wu_d(_1, _2); } +// CHECK-LABEL: @xvsrarn_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrarn.b.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsrarn_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrarn_b_h(_1, _2); } +// CHECK-LABEL: @xvsrarn_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrarn.h.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsrarn_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrarn_h_w(_1, _2); } +// CHECK-LABEL: @xvsrarn_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrarn.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsrarn_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrarn_w_d(_1, _2); } +// CHECK-LABEL: @xvssrarn_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarn.b.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvssrarn_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrarn_b_h(_1, _2); } +// CHECK-LABEL: @xvssrarn_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarn.h.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvssrarn_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrarn_h_w(_1, _2); } +// CHECK-LABEL: @xvssrarn_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarn.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvssrarn_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrarn_w_d(_1, _2); } +// CHECK-LABEL: @xvssrarn_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarn.bu.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvssrarn_bu_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvssrarn_bu_h(_1, _2); } +// CHECK-LABEL: @xvssrarn_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarn.hu.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvssrarn_hu_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvssrarn_hu_w(_1, _2); } +// CHECK-LABEL: @xvssrarn_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarn.wu.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvssrarn_wu_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvssrarn_wu_d(_1, _2); } +// CHECK-LABEL: @xvsrln_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrln.b.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsrln_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrln_b_h(_1, _2); } +// CHECK-LABEL: @xvsrln_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrln.h.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsrln_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrln_h_w(_1, _2); } +// CHECK-LABEL: @xvsrln_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrln.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsrln_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrln_w_d(_1, _2); } +// CHECK-LABEL: @xvssrln_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrln.bu.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvssrln_bu_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvssrln_bu_h(_1, _2); } +// CHECK-LABEL: @xvssrln_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrln.hu.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvssrln_hu_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvssrln_hu_w(_1, _2); } +// CHECK-LABEL: @xvssrln_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrln.wu.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvssrln_wu_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvssrln_wu_d(_1, _2); } +// CHECK-LABEL: @xvsrlrn_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlrn.b.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsrlrn_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrlrn_b_h(_1, _2); } +// CHECK-LABEL: @xvsrlrn_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlrn.h.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsrlrn_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrlrn_h_w(_1, _2); } +// CHECK-LABEL: @xvsrlrn_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlrn.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsrlrn_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrlrn_w_d(_1, _2); } +// CHECK-LABEL: @xvssrlrn_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.bu.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvssrlrn_bu_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvssrlrn_bu_h(_1, _2); } +// CHECK-LABEL: @xvssrlrn_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.hu.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvssrlrn_hu_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvssrlrn_hu_w(_1, _2); } +// CHECK-LABEL: @xvssrlrn_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.wu.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvssrlrn_wu_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvssrlrn_wu_d(_1, _2); } +// CHECK-LABEL: @xvfrstpi_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvfrstpi.b(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvfrstpi_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvfrstpi_b(_1, _2, 1); } +// CHECK-LABEL: @xvfrstpi_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfrstpi.h(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvfrstpi_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvfrstpi_h(_1, _2, 1); } +// CHECK-LABEL: @xvfrstp_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvfrstp.b(<32 x i8> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]]) +// CHECK-NEXT: store <32 x i8> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvfrstp_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __builtin_lasx_xvfrstp_b(_1, _2, _3); } +// CHECK-LABEL: @xvfrstp_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvfrstp.h(<16 x i16> [[_1]], <16 x i16> [[_2]], <16 x i16> [[_3]]) +// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvfrstp_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __builtin_lasx_xvfrstp_h(_1, _2, _3); } +// CHECK-LABEL: @xvshuf4i_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvshuf4i.d(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvshuf4i_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvshuf4i_d(_1, _2, 1); } +// CHECK-LABEL: @xvbsrl_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbsrl.v(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvbsrl_v(v32i8 _1) { return __builtin_lasx_xvbsrl_v(_1, 1); } +// CHECK-LABEL: @xvbsll_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvbsll.v(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvbsll_v(v32i8 _1) { return __builtin_lasx_xvbsll_v(_1, 1); } +// CHECK-LABEL: @xvextrins_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvextrins.b(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvextrins_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvextrins_b(_1, _2, 1); } +// CHECK-LABEL: @xvextrins_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvextrins.h(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvextrins_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvextrins_h(_1, _2, 1); } +// CHECK-LABEL: @xvextrins_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvextrins.w(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvextrins_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvextrins_w(_1, _2, 1); } +// CHECK-LABEL: @xvextrins_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextrins.d(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvextrins_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvextrins_d(_1, _2, 1); } +// CHECK-LABEL: @xvmskltz_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmskltz.b(<32 x i8> [[_1]]) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvmskltz_b(v32i8 _1) { return __builtin_lasx_xvmskltz_b(_1); } +// CHECK-LABEL: @xvmskltz_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmskltz.h(<16 x i16> [[_1]]) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvmskltz_h(v16i16 _1) { return __builtin_lasx_xvmskltz_h(_1); } +// CHECK-LABEL: @xvmskltz_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmskltz.w(<8 x i32> [[_1]]) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvmskltz_w(v8i32 _1) { return __builtin_lasx_xvmskltz_w(_1); } +// CHECK-LABEL: @xvmskltz_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmskltz.d(<4 x i64> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmskltz_d(v4i64 _1) { return __builtin_lasx_xvmskltz_d(_1); } +// CHECK-LABEL: @xvsigncov_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsigncov.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsigncov_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsigncov_b(_1, _2); } +// CHECK-LABEL: @xvsigncov_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsigncov.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsigncov_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsigncov_h(_1, _2); } +// CHECK-LABEL: @xvsigncov_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsigncov.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsigncov_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsigncov_w(_1, _2); } +// CHECK-LABEL: @xvsigncov_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsigncov.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsigncov_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsigncov_d(_1, _2); } +// CHECK-LABEL: @xvfmadd_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <8 x float>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmadd.s(<8 x float> [[_1]], <8 x float> [[_2]], <8 x float> [[_3]]) +// CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvfmadd_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __builtin_lasx_xvfmadd_s(_1, _2, _3); } +// CHECK-LABEL: @xvfmadd_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x double>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmadd.d(<4 x double> [[_1]], <4 x double> [[_2]], <4 x double> [[_3]]) +// CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvfmadd_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __builtin_lasx_xvfmadd_d(_1, _2, _3); } +// CHECK-LABEL: @xvfmsub_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <8 x float>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfmsub.s(<8 x float> [[_1]], <8 x float> [[_2]], <8 x float> [[_3]]) +// CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvfmsub_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __builtin_lasx_xvfmsub_s(_1, _2, _3); } +// CHECK-LABEL: @xvfmsub_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x double>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfmsub.d(<4 x double> [[_1]], <4 x double> [[_2]], <4 x double> [[_3]]) +// CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvfmsub_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __builtin_lasx_xvfmsub_d(_1, _2, _3); } +// CHECK-LABEL: @xvfnmadd_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <8 x float>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfnmadd.s(<8 x float> [[_1]], <8 x float> [[_2]], <8 x float> [[_3]]) +// CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvfnmadd_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __builtin_lasx_xvfnmadd_s(_1, _2, _3); } +// CHECK-LABEL: @xvfnmadd_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x double>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfnmadd.d(<4 x double> [[_1]], <4 x double> [[_2]], <4 x double> [[_3]]) +// CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvfnmadd_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __builtin_lasx_xvfnmadd_d(_1, _2, _3); } +// CHECK-LABEL: @xvfnmsub_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <8 x float>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfnmsub.s(<8 x float> [[_1]], <8 x float> [[_2]], <8 x float> [[_3]]) +// CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvfnmsub_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __builtin_lasx_xvfnmsub_s(_1, _2, _3); } +// CHECK-LABEL: @xvfnmsub_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x double>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfnmsub.d(<4 x double> [[_1]], <4 x double> [[_2]], <4 x double> [[_3]]) +// CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvfnmsub_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __builtin_lasx_xvfnmsub_d(_1, _2, _3); } +// CHECK-LABEL: @xvftintrne_w_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvftintrne_w_s(v8f32 _1) { return __builtin_lasx_xvftintrne_w_s(_1); } +// CHECK-LABEL: @xvftintrne_l_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrne.l.d(<4 x double> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvftintrne_l_d(v4f64 _1) { return __builtin_lasx_xvftintrne_l_d(_1); } +// CHECK-LABEL: @xvftintrp_w_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvftintrp_w_s(v8f32 _1) { return __builtin_lasx_xvftintrp_w_s(_1); } +// CHECK-LABEL: @xvftintrp_l_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrp.l.d(<4 x double> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvftintrp_l_d(v4f64 _1) { return __builtin_lasx_xvftintrp_l_d(_1); } +// CHECK-LABEL: @xvftintrm_w_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvftintrm_w_s(v8f32 _1) { return __builtin_lasx_xvftintrm_w_s(_1); } +// CHECK-LABEL: @xvftintrm_l_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrm.l.d(<4 x double> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvftintrm_l_d(v4f64 _1) { return __builtin_lasx_xvftintrm_l_d(_1); } +// CHECK-LABEL: @xvftint_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.w.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvftint_w_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvftint_w_d(_1, _2); } +// CHECK-LABEL: @xvffint_s_l( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.l(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvffint_s_l(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvffint_s_l(_1, _2); } +// CHECK-LABEL: @xvftintrz_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvftintrz_w_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvftintrz_w_d(_1, _2); } +// CHECK-LABEL: @xvftintrp_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvftintrp_w_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvftintrp_w_d(_1, _2); } +// CHECK-LABEL: @xvftintrm_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvftintrm_w_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvftintrm_w_d(_1, _2); } +// CHECK-LABEL: @xvftintrne_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvftintrne_w_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvftintrne_w_d(_1, _2); } +// CHECK-LABEL: @xvftinth_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftinth.l.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvftinth_l_s(v8f32 _1) { return __builtin_lasx_xvftinth_l_s(_1); } +// CHECK-LABEL: @xvftintl_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintl.l.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvftintl_l_s(v8f32 _1) { return __builtin_lasx_xvftintl_l_s(_1); } +// CHECK-LABEL: @xvffinth_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffinth.d.w(<8 x i32> [[_1]]) +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvffinth_d_w(v8i32 _1) { return __builtin_lasx_xvffinth_d_w(_1); } +// CHECK-LABEL: @xvffintl_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvffintl.d.w(<8 x i32> [[_1]]) +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvffintl_d_w(v8i32 _1) { return __builtin_lasx_xvffintl_d_w(_1); } +// CHECK-LABEL: @xvftintrzh_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrzh.l.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvftintrzh_l_s(v8f32 _1) { return __builtin_lasx_xvftintrzh_l_s(_1); } +// CHECK-LABEL: @xvftintrzl_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrzl.l.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvftintrzl_l_s(v8f32 _1) { return __builtin_lasx_xvftintrzl_l_s(_1); } +// CHECK-LABEL: @xvftintrph_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrph.l.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvftintrph_l_s(v8f32 _1) { return __builtin_lasx_xvftintrph_l_s(_1); } +// CHECK-LABEL: @xvftintrpl_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrpl.l.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvftintrpl_l_s(v8f32 _1) { return __builtin_lasx_xvftintrpl_l_s(_1); } +// CHECK-LABEL: @xvftintrmh_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrmh.l.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvftintrmh_l_s(v8f32 _1) { return __builtin_lasx_xvftintrmh_l_s(_1); } +// CHECK-LABEL: @xvftintrml_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrml.l.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvftintrml_l_s(v8f32 _1) { return __builtin_lasx_xvftintrml_l_s(_1); } +// CHECK-LABEL: @xvftintrneh_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrneh.l.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvftintrneh_l_s(v8f32 _1) { return __builtin_lasx_xvftintrneh_l_s(_1); } +// CHECK-LABEL: @xvftintrnel_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrnel.l.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvftintrnel_l_s(v8f32 _1) { return __builtin_lasx_xvftintrnel_l_s(_1); } +// CHECK-LABEL: @xvfrintrne_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrne.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfrintrne_s(v8f32 _1) { return __builtin_lasx_xvfrintrne_s(_1); } +// CHECK-LABEL: @xvfrintrne_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrne.d(<4 x double> [[_1]]) +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfrintrne_d(v4f64 _1) { return __builtin_lasx_xvfrintrne_d(_1); } +// CHECK-LABEL: @xvfrintrz_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrz.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfrintrz_s(v8f32 _1) { return __builtin_lasx_xvfrintrz_s(_1); } +// CHECK-LABEL: @xvfrintrz_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrz.d(<4 x double> [[_1]]) +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfrintrz_d(v4f64 _1) { return __builtin_lasx_xvfrintrz_d(_1); } +// CHECK-LABEL: @xvfrintrp_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrp.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfrintrp_s(v8f32 _1) { return __builtin_lasx_xvfrintrp_s(_1); } +// CHECK-LABEL: @xvfrintrp_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrp.d(<4 x double> [[_1]]) +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfrintrp_d(v4f64 _1) { return __builtin_lasx_xvfrintrp_d(_1); } +// CHECK-LABEL: @xvfrintrm_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrm.s(<8 x float> [[_1]]) +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfrintrm_s(v8f32 _1) { return __builtin_lasx_xvfrintrm_s(_1); } +// CHECK-LABEL: @xvfrintrm_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrm.d(<4 x double> [[_1]]) +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfrintrm_d(v4f64 _1) { return __builtin_lasx_xvfrintrm_d(_1); } +// CHECK-LABEL: @xvld( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvld(ptr [[_1:%.*]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvld(void *_1) { return __builtin_lasx_xvld(_1, 1); } +// CHECK-LABEL: @xvst( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvst(<32 x i8> [[_1]], ptr [[_2:%.*]], i32 1) +// CHECK-NEXT: ret void +// +void xvst(v32i8 _1, void *_2) { return __builtin_lasx_xvst(_1, _2, 1); } +// CHECK-LABEL: @xvstelm_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.b(<32 x i8> [[_1]], ptr [[_2:%.*]], i32 1, i32 1) +// CHECK-NEXT: ret void +// +void xvstelm_b(v32i8 _1, void * _2) { return __builtin_lasx_xvstelm_b(_1, _2, 1, 1); } +// CHECK-LABEL: @xvstelm_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.h(<16 x i16> [[_1]], ptr [[_2:%.*]], i32 2, i32 1) +// CHECK-NEXT: ret void +// +void xvstelm_h(v16i16 _1, void * _2) { return __builtin_lasx_xvstelm_h(_1, _2, 2, 1); } +// CHECK-LABEL: @xvstelm_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.w(<8 x i32> [[_1]], ptr [[_2:%.*]], i32 4, i32 1) +// CHECK-NEXT: ret void +// +void xvstelm_w(v8i32 _1, void * _2) { return __builtin_lasx_xvstelm_w(_1, _2, 4, 1); } +// CHECK-LABEL: @xvstelm_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.d(<4 x i64> [[_1]], ptr [[_2:%.*]], i32 8, i32 1) +// CHECK-NEXT: ret void +// +void xvstelm_d(v4i64 _1, void * _2) { return __builtin_lasx_xvstelm_d(_1, _2, 8, 1); } +// CHECK-LABEL: @xvinsve0_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvinsve0.w(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvinsve0_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvinsve0_w(_1, _2, 1); } +// CHECK-LABEL: @xvinsve0_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvinsve0.d(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvinsve0_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvinsve0_d(_1, _2, 1); } +// CHECK-LABEL: @xvpickve_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvpickve.w(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvpickve_w(v8i32 _1) { return __builtin_lasx_xvpickve_w(_1, 1); } +// CHECK-LABEL: @xvpickve_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpickve.d(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvpickve_d(v4i64 _1) { return __builtin_lasx_xvpickve_d(_1, 1); } +// CHECK-LABEL: @xvssrlrn_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.b.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvssrlrn_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrlrn_b_h(_1, _2); } +// CHECK-LABEL: @xvssrlrn_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.h.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvssrlrn_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrlrn_h_w(_1, _2); } +// CHECK-LABEL: @xvssrlrn_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvssrlrn_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrlrn_w_d(_1, _2); } +// CHECK-LABEL: @xvssrln_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrln.b.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvssrln_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrln_b_h(_1, _2); } +// CHECK-LABEL: @xvssrln_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrln.h.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvssrln_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrln_h_w(_1, _2); } +// CHECK-LABEL: @xvssrln_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrln.w.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvssrln_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrln_w_d(_1, _2); } +// CHECK-LABEL: @xvorn_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvorn.v(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvorn_v(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvorn_v(_1, _2); } +// CHECK-LABEL: @xvldi( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvldi(i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvldi() { return __builtin_lasx_xvldi(1); } +// CHECK-LABEL: @xvldx( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvldx(ptr [[_1:%.*]], i64 1) +// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvldx(void *_1) { return __builtin_lasx_xvldx(_1, 1); } +// CHECK-LABEL: @xvstx( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstx(<32 x i8> [[_1]], ptr [[_2:%.*]], i64 1) +// CHECK-NEXT: ret void +// +void xvstx(v32i8 _1, void *_2) { return __builtin_lasx_xvstx(_1, _2, 1); } +// CHECK-LABEL: @xvextl_qu_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextl.qu.du(<4 x i64> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvextl_qu_du(v4u64 _1) { return __builtin_lasx_xvextl_qu_du(_1); } +// CHECK-LABEL: @xvinsgr2vr_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w(<8 x i32> [[_1]], i32 1, i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvinsgr2vr_w(v8i32 _1) { return __builtin_lasx_xvinsgr2vr_w(_1, 1, 1); } +// CHECK-LABEL: @xvinsgr2vr_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d(<4 x i64> [[_1]], i64 1, i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvinsgr2vr_d(v4i64 _1) { return __builtin_lasx_xvinsgr2vr_d(_1, 1, 1); } +// CHECK-LABEL: @xvreplve0_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve0.b(<32 x i8> [[_1]]) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvreplve0_b(v32i8 _1) { return __builtin_lasx_xvreplve0_b(_1); } +// CHECK-LABEL: @xvreplve0_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvreplve0.h(<16 x i16> [[_1]]) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvreplve0_h(v16i16 _1) { return __builtin_lasx_xvreplve0_h(_1); } +// CHECK-LABEL: @xvreplve0_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvreplve0.w(<8 x i32> [[_1]]) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvreplve0_w(v8i32 _1) { return __builtin_lasx_xvreplve0_w(_1); } +// CHECK-LABEL: @xvreplve0_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvreplve0.d(<4 x i64> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvreplve0_d(v4i64 _1) { return __builtin_lasx_xvreplve0_d(_1); } +// CHECK-LABEL: @xvreplve0_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve0.q(<32 x i8> [[_1]]) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvreplve0_q(v32i8 _1) { return __builtin_lasx_xvreplve0_q(_1); } +// CHECK-LABEL: @vext2xv_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.vext2xv.h.b(<32 x i8> [[_1]]) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 vext2xv_h_b(v32i8 _1) { return __builtin_lasx_vext2xv_h_b(_1); } +// CHECK-LABEL: @vext2xv_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.h(<16 x i16> [[_1]]) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 vext2xv_w_h(v16i16 _1) { return __builtin_lasx_vext2xv_w_h(_1); } +// CHECK-LABEL: @vext2xv_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.w(<8 x i32> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 vext2xv_d_w(v8i32 _1) { return __builtin_lasx_vext2xv_d_w(_1); } +// CHECK-LABEL: @vext2xv_w_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.b(<32 x i8> [[_1]]) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 vext2xv_w_b(v32i8 _1) { return __builtin_lasx_vext2xv_w_b(_1); } +// CHECK-LABEL: @vext2xv_d_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.h(<16 x i16> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 vext2xv_d_h(v16i16 _1) { return __builtin_lasx_vext2xv_d_h(_1); } +// CHECK-LABEL: @vext2xv_d_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.b(<32 x i8> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 vext2xv_d_b(v32i8 _1) { return __builtin_lasx_vext2xv_d_b(_1); } +// CHECK-LABEL: @vext2xv_hu_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.vext2xv.hu.bu(<32 x i8> [[_1]]) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 vext2xv_hu_bu(v32i8 _1) { return __builtin_lasx_vext2xv_hu_bu(_1); } +// CHECK-LABEL: @vext2xv_wu_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.hu(<16 x i16> [[_1]]) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 vext2xv_wu_hu(v16i16 _1) { return __builtin_lasx_vext2xv_wu_hu(_1); } +// CHECK-LABEL: @vext2xv_du_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.wu(<8 x i32> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 vext2xv_du_wu(v8i32 _1) { return __builtin_lasx_vext2xv_du_wu(_1); } +// CHECK-LABEL: @vext2xv_wu_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.bu(<32 x i8> [[_1]]) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 vext2xv_wu_bu(v32i8 _1) { return __builtin_lasx_vext2xv_wu_bu(_1); } +// CHECK-LABEL: @vext2xv_du_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.hu(<16 x i16> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 vext2xv_du_hu(v16i16 _1) { return __builtin_lasx_vext2xv_du_hu(_1); } +// CHECK-LABEL: @vext2xv_du_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.bu(<32 x i8> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 vext2xv_du_bu(v32i8 _1) { return __builtin_lasx_vext2xv_du_bu(_1); } +// CHECK-LABEL: @xvpermi_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvpermi.q(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvpermi_q(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvpermi_q(_1, _2, 1); } +// CHECK-LABEL: @xvpermi_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvpermi.d(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvpermi_d(v4i64 _1) { return __builtin_lasx_xvpermi_d(_1, 1); } +// CHECK-LABEL: @xvperm_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvperm.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvperm_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvperm_w(_1, _2); } +// CHECK-LABEL: @xvldrepl_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(ptr [[_1:%.*]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvldrepl_b(void *_1) { return __builtin_lasx_xvldrepl_b(_1, 1); } +// CHECK-LABEL: @xvldrepl_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvldrepl.h(ptr [[_1:%.*]], i32 2) +// CHECK-NEXT: store <16 x i16> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvldrepl_h(void *_1) { return __builtin_lasx_xvldrepl_h(_1, 2); } +// CHECK-LABEL: @xvldrepl_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvldrepl.w(ptr [[_1:%.*]], i32 4) +// CHECK-NEXT: store <8 x i32> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvldrepl_w(void *_1) { return __builtin_lasx_xvldrepl_w(_1, 4); } +// CHECK-LABEL: @xvldrepl_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvldrepl.d(ptr [[_1:%.*]], i32 8) +// CHECK-NEXT: store <4 x i64> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvldrepl_d(void *_1) { return __builtin_lasx_xvldrepl_d(_1, 8); } +// CHECK-LABEL: @xvpickve2gr_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xvpickve2gr.w(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int xvpickve2gr_w(v8i32 _1) { return __builtin_lasx_xvpickve2gr_w(_1, 1); } +// CHECK-LABEL: @xvpickve2gr_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xvpickve2gr.wu(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: ret i32 [[TMP1]] +// +unsigned int xvpickve2gr_wu(v8i32 _1) { return __builtin_lasx_xvpickve2gr_wu(_1, 1); } +// CHECK-LABEL: @xvpickve2gr_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.loongarch.lasx.xvpickve2gr.d(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: ret i64 [[TMP1]] +// +long xvpickve2gr_d(v4i64 _1) { return __builtin_lasx_xvpickve2gr_d(_1, 1); } +// CHECK-LABEL: @xvpickve2gr_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.loongarch.lasx.xvpickve2gr.du(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: ret i64 [[TMP1]] +// +unsigned long int xvpickve2gr_du(v4i64 _1) { return __builtin_lasx_xvpickve2gr_du(_1, 1); } +// CHECK-LABEL: @xvaddwev_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvaddwev_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvaddwev_q_d(_1, _2); } +// CHECK-LABEL: @xvaddwev_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvaddwev_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvaddwev_d_w(_1, _2); } +// CHECK-LABEL: @xvaddwev_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvaddwev_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvaddwev_w_h(_1, _2); } +// CHECK-LABEL: @xvaddwev_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvaddwev_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvaddwev_h_b(_1, _2); } +// CHECK-LABEL: @xvaddwev_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvaddwev_q_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvaddwev_q_du(_1, _2); } +// CHECK-LABEL: @xvaddwev_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvaddwev_d_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvaddwev_d_wu(_1, _2); } +// CHECK-LABEL: @xvaddwev_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvaddwev_w_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvaddwev_w_hu(_1, _2); } +// CHECK-LABEL: @xvaddwev_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvaddwev_h_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvaddwev_h_bu(_1, _2); } +// CHECK-LABEL: @xvsubwev_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsubwev_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsubwev_q_d(_1, _2); } +// CHECK-LABEL: @xvsubwev_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsubwev_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsubwev_d_w(_1, _2); } +// CHECK-LABEL: @xvsubwev_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsubwev_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsubwev_w_h(_1, _2); } +// CHECK-LABEL: @xvsubwev_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsubwev_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsubwev_h_b(_1, _2); } +// CHECK-LABEL: @xvsubwev_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsubwev_q_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvsubwev_q_du(_1, _2); } +// CHECK-LABEL: @xvsubwev_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsubwev_d_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvsubwev_d_wu(_1, _2); } +// CHECK-LABEL: @xvsubwev_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsubwev_w_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvsubwev_w_hu(_1, _2); } +// CHECK-LABEL: @xvsubwev_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsubwev_h_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvsubwev_h_bu(_1, _2); } +// CHECK-LABEL: @xvmulwev_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmulwev_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmulwev_q_d(_1, _2); } +// CHECK-LABEL: @xvmulwev_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmulwev_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmulwev_d_w(_1, _2); } +// CHECK-LABEL: @xvmulwev_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvmulwev_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmulwev_w_h(_1, _2); } +// CHECK-LABEL: @xvmulwev_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvmulwev_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmulwev_h_b(_1, _2); } +// CHECK-LABEL: @xvmulwev_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmulwev_q_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvmulwev_q_du(_1, _2); } +// CHECK-LABEL: @xvmulwev_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmulwev_d_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvmulwev_d_wu(_1, _2); } +// CHECK-LABEL: @xvmulwev_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvmulwev_w_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvmulwev_w_hu(_1, _2); } +// CHECK-LABEL: @xvmulwev_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvmulwev_h_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvmulwev_h_bu(_1, _2); } +// CHECK-LABEL: @xvaddwod_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvaddwod_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvaddwod_q_d(_1, _2); } +// CHECK-LABEL: @xvaddwod_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvaddwod_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvaddwod_d_w(_1, _2); } +// CHECK-LABEL: @xvaddwod_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvaddwod_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvaddwod_w_h(_1, _2); } +// CHECK-LABEL: @xvaddwod_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvaddwod_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvaddwod_h_b(_1, _2); } +// CHECK-LABEL: @xvaddwod_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvaddwod_q_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvaddwod_q_du(_1, _2); } +// CHECK-LABEL: @xvaddwod_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvaddwod_d_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvaddwod_d_wu(_1, _2); } +// CHECK-LABEL: @xvaddwod_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvaddwod_w_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvaddwod_w_hu(_1, _2); } +// CHECK-LABEL: @xvaddwod_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvaddwod_h_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvaddwod_h_bu(_1, _2); } +// CHECK-LABEL: @xvsubwod_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsubwod_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsubwod_q_d(_1, _2); } +// CHECK-LABEL: @xvsubwod_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsubwod_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsubwod_d_w(_1, _2); } +// CHECK-LABEL: @xvsubwod_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsubwod_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsubwod_w_h(_1, _2); } +// CHECK-LABEL: @xvsubwod_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsubwod_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsubwod_h_b(_1, _2); } +// CHECK-LABEL: @xvsubwod_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsubwod_q_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvsubwod_q_du(_1, _2); } +// CHECK-LABEL: @xvsubwod_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsubwod_d_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvsubwod_d_wu(_1, _2); } +// CHECK-LABEL: @xvsubwod_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsubwod_w_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvsubwod_w_hu(_1, _2); } +// CHECK-LABEL: @xvsubwod_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsubwod_h_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvsubwod_h_bu(_1, _2); } +// CHECK-LABEL: @xvmulwod_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmulwod_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmulwod_q_d(_1, _2); } +// CHECK-LABEL: @xvmulwod_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmulwod_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmulwod_d_w(_1, _2); } +// CHECK-LABEL: @xvmulwod_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvmulwod_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmulwod_w_h(_1, _2); } +// CHECK-LABEL: @xvmulwod_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvmulwod_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmulwod_h_b(_1, _2); } +// CHECK-LABEL: @xvmulwod_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmulwod_q_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvmulwod_q_du(_1, _2); } +// CHECK-LABEL: @xvmulwod_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmulwod_d_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvmulwod_d_wu(_1, _2); } +// CHECK-LABEL: @xvmulwod_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvmulwod_w_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvmulwod_w_hu(_1, _2); } +// CHECK-LABEL: @xvmulwod_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvmulwod_h_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvmulwod_h_bu(_1, _2); } +// CHECK-LABEL: @xvaddwev_d_wu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvaddwev_d_wu_w(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvaddwev_d_wu_w(_1, _2); } +// CHECK-LABEL: @xvaddwev_w_hu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvaddwev_w_hu_h(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvaddwev_w_hu_h(_1, _2); } +// CHECK-LABEL: @xvaddwev_h_bu_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvaddwev_h_bu_b(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvaddwev_h_bu_b(_1, _2); } +// CHECK-LABEL: @xvmulwev_d_wu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmulwev_d_wu_w(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvmulwev_d_wu_w(_1, _2); } +// CHECK-LABEL: @xvmulwev_w_hu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvmulwev_w_hu_h(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvmulwev_w_hu_h(_1, _2); } +// CHECK-LABEL: @xvmulwev_h_bu_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvmulwev_h_bu_b(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvmulwev_h_bu_b(_1, _2); } +// CHECK-LABEL: @xvaddwod_d_wu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvaddwod_d_wu_w(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvaddwod_d_wu_w(_1, _2); } +// CHECK-LABEL: @xvaddwod_w_hu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvaddwod_w_hu_h(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvaddwod_w_hu_h(_1, _2); } +// CHECK-LABEL: @xvaddwod_h_bu_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvaddwod_h_bu_b(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvaddwod_h_bu_b(_1, _2); } +// CHECK-LABEL: @xvmulwod_d_wu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmulwod_d_wu_w(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvmulwod_d_wu_w(_1, _2); } +// CHECK-LABEL: @xvmulwod_w_hu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvmulwod_w_hu_h(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvmulwod_w_hu_h(_1, _2); } +// CHECK-LABEL: @xvmulwod_h_bu_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvmulwod_h_bu_b(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvmulwod_h_bu_b(_1, _2); } +// CHECK-LABEL: @xvhaddw_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvhaddw_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvhaddw_q_d(_1, _2); } +// CHECK-LABEL: @xvhaddw_qu_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.qu.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvhaddw_qu_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvhaddw_qu_du(_1, _2); } +// CHECK-LABEL: @xvhsubw_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvhsubw_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvhsubw_q_d(_1, _2); } +// CHECK-LABEL: @xvhsubw_qu_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.qu.du(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvhsubw_qu_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvhsubw_qu_du(_1, _2); } +// CHECK-LABEL: @xvmaddwev_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmaddwev_q_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __builtin_lasx_xvmaddwev_q_d(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwev_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.w(<4 x i64> [[_1]], <8 x i32> [[_2]], <8 x i32> [[_3]]) +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmaddwev_d_w(v4i64 _1, v8i32 _2, v8i32 _3) { return __builtin_lasx_xvmaddwev_d_w(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwev_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.h(<8 x i32> [[_1]], <16 x i16> [[_2]], <16 x i16> [[_3]]) +// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvmaddwev_w_h(v8i32 _1, v16i16 _2, v16i16 _3) { return __builtin_lasx_xvmaddwev_w_h(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwev_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.b(<16 x i16> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]]) +// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvmaddwev_h_b(v16i16 _1, v32i8 _2, v32i8 _3) { return __builtin_lasx_xvmaddwev_h_b(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwev_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvmaddwev_q_du(v4u64 _1, v4u64 _2, v4u64 _3) { return __builtin_lasx_xvmaddwev_q_du(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwev_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu(<4 x i64> [[_1]], <8 x i32> [[_2]], <8 x i32> [[_3]]) +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvmaddwev_d_wu(v4u64 _1, v8u32 _2, v8u32 _3) { return __builtin_lasx_xvmaddwev_d_wu(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwev_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu(<8 x i32> [[_1]], <16 x i16> [[_2]], <16 x i16> [[_3]]) +// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvmaddwev_w_hu(v8u32 _1, v16u16 _2, v16u16 _3) { return __builtin_lasx_xvmaddwev_w_hu(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwev_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu(<16 x i16> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]]) +// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvmaddwev_h_bu(v16u16 _1, v32u8 _2, v32u8 _3) { return __builtin_lasx_xvmaddwev_h_bu(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwod_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmaddwod_q_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __builtin_lasx_xvmaddwod_q_d(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwod_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.w(<4 x i64> [[_1]], <8 x i32> [[_2]], <8 x i32> [[_3]]) +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmaddwod_d_w(v4i64 _1, v8i32 _2, v8i32 _3) { return __builtin_lasx_xvmaddwod_d_w(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwod_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.h(<8 x i32> [[_1]], <16 x i16> [[_2]], <16 x i16> [[_3]]) +// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvmaddwod_w_h(v8i32 _1, v16i16 _2, v16i16 _3) { return __builtin_lasx_xvmaddwod_w_h(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwod_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.b(<16 x i16> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]]) +// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvmaddwod_h_b(v16i16 _1, v32i8 _2, v32i8 _3) { return __builtin_lasx_xvmaddwod_h_b(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwod_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvmaddwod_q_du(v4u64 _1, v4u64 _2, v4u64 _3) { return __builtin_lasx_xvmaddwod_q_du(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwod_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu(<4 x i64> [[_1]], <8 x i32> [[_2]], <8 x i32> [[_3]]) +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvmaddwod_d_wu(v4u64 _1, v8u32 _2, v8u32 _3) { return __builtin_lasx_xvmaddwod_d_wu(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwod_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu(<8 x i32> [[_1]], <16 x i16> [[_2]], <16 x i16> [[_3]]) +// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvmaddwod_w_hu(v8u32 _1, v16u16 _2, v16u16 _3) { return __builtin_lasx_xvmaddwod_w_hu(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwod_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu(<16 x i16> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]]) +// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvmaddwod_h_bu(v16u16 _1, v32u8 _2, v32u8 _3) { return __builtin_lasx_xvmaddwod_h_bu(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwev_q_du_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmaddwev_q_du_d(v4i64 _1, v4u64 _2, v4i64 _3) { return __builtin_lasx_xvmaddwev_q_du_d(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwev_d_wu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu.w(<4 x i64> [[_1]], <8 x i32> [[_2]], <8 x i32> [[_3]]) +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmaddwev_d_wu_w(v4i64 _1, v8u32 _2, v8i32 _3) { return __builtin_lasx_xvmaddwev_d_wu_w(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwev_w_hu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu.h(<8 x i32> [[_1]], <16 x i16> [[_2]], <16 x i16> [[_3]]) +// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvmaddwev_w_hu_h(v8i32 _1, v16u16 _2, v16i16 _3) { return __builtin_lasx_xvmaddwev_w_hu_h(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwev_h_bu_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu.b(<16 x i16> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]]) +// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvmaddwev_h_bu_b(v16i16 _1, v32u8 _2, v32i8 _3) { return __builtin_lasx_xvmaddwev_h_bu_b(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwod_q_du_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <4 x i64>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]], <4 x i64> [[_3]]) +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmaddwod_q_du_d(v4i64 _1, v4u64 _2, v4i64 _3) { return __builtin_lasx_xvmaddwod_q_du_d(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwod_d_wu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <8 x i32>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu.w(<4 x i64> [[_1]], <8 x i32> [[_2]], <8 x i32> [[_3]]) +// CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmaddwod_d_wu_w(v4i64 _1, v8u32 _2, v8i32 _3) { return __builtin_lasx_xvmaddwod_d_wu_w(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwod_w_hu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <16 x i16>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu.h(<8 x i32> [[_1]], <16 x i16> [[_2]], <16 x i16> [[_3]]) +// CHECK-NEXT: store <8 x i32> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvmaddwod_w_hu_h(v8i32 _1, v16u16 _2, v16i16 _3) { return __builtin_lasx_xvmaddwod_w_hu_h(_1, _2, _3); } +// CHECK-LABEL: @xvmaddwod_h_bu_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_3:%.*]] = load <32 x i8>, ptr [[TMP2:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu.b(<16 x i16> [[_1]], <32 x i8> [[_2]], <32 x i8> [[_3]]) +// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvmaddwod_h_bu_b(v16i16 _1, v32u8 _2, v32i8 _3) { return __builtin_lasx_xvmaddwod_h_bu_b(_1, _2, _3); } +// CHECK-LABEL: @xvrotr_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrotr.b(<32 x i8> [[_1]], <32 x i8> [[_2]]) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvrotr_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvrotr_b(_1, _2); } +// CHECK-LABEL: @xvrotr_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrotr.h(<16 x i16> [[_1]], <16 x i16> [[_2]]) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvrotr_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvrotr_h(_1, _2); } +// CHECK-LABEL: @xvrotr_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrotr.w(<8 x i32> [[_1]], <8 x i32> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvrotr_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvrotr_w(_1, _2); } +// CHECK-LABEL: @xvrotr_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrotr.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvrotr_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvrotr_d(_1, _2); } +// CHECK-LABEL: @xvadd_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvadd.q(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvadd_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvadd_q(_1, _2); } +// CHECK-LABEL: @xvsub_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsub.q(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsub_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsub_q(_1, _2); } +// CHECK-LABEL: @xvaddwev_q_du_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvaddwev_q_du_d(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvaddwev_q_du_d(_1, _2); } +// CHECK-LABEL: @xvaddwod_q_du_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvaddwod_q_du_d(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvaddwod_q_du_d(_1, _2); } +// CHECK-LABEL: @xvmulwev_q_du_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmulwev_q_du_d(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvmulwev_q_du_d(_1, _2); } +// CHECK-LABEL: @xvmulwod_q_du_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du.d(<4 x i64> [[_1]], <4 x i64> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvmulwod_q_du_d(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvmulwod_q_du_d(_1, _2); } +// CHECK-LABEL: @xvmskgez_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmskgez.b(<32 x i8> [[_1]]) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvmskgez_b(v32i8 _1) { return __builtin_lasx_xvmskgez_b(_1); } +// CHECK-LABEL: @xvmsknz_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvmsknz.b(<32 x i8> [[_1]]) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvmsknz_b(v32i8 _1) { return __builtin_lasx_xvmsknz_b(_1); } +// CHECK-LABEL: @xvexth_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvexth.h.b(<32 x i8> [[_1]]) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvexth_h_b(v32i8 _1) { return __builtin_lasx_xvexth_h_b(_1); } +// CHECK-LABEL: @xvexth_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvexth.w.h(<16 x i16> [[_1]]) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvexth_w_h(v16i16 _1) { return __builtin_lasx_xvexth_w_h(_1); } +// CHECK-LABEL: @xvexth_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.d.w(<8 x i32> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvexth_d_w(v8i32 _1) { return __builtin_lasx_xvexth_d_w(_1); } +// CHECK-LABEL: @xvexth_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.q.d(<4 x i64> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvexth_q_d(v4i64 _1) { return __builtin_lasx_xvexth_q_d(_1); } +// CHECK-LABEL: @xvexth_hu_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvexth.hu.bu(<32 x i8> [[_1]]) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvexth_hu_bu(v32u8 _1) { return __builtin_lasx_xvexth_hu_bu(_1); } +// CHECK-LABEL: @xvexth_wu_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvexth.wu.hu(<16 x i16> [[_1]]) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvexth_wu_hu(v16u16 _1) { return __builtin_lasx_xvexth_wu_hu(_1); } +// CHECK-LABEL: @xvexth_du_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.du.wu(<8 x i32> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvexth_du_wu(v8u32 _1) { return __builtin_lasx_xvexth_du_wu(_1); } +// CHECK-LABEL: @xvexth_qu_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.qu.du(<4 x i64> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvexth_qu_du(v4u64 _1) { return __builtin_lasx_xvexth_qu_du(_1); } +// CHECK-LABEL: @xvrotri_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrotri.b(<32 x i8> [[_1]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvrotri_b(v32i8 _1) { return __builtin_lasx_xvrotri_b(_1, 1); } +// CHECK-LABEL: @xvrotri_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrotri.h(<16 x i16> [[_1]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvrotri_h(v16i16 _1) { return __builtin_lasx_xvrotri_h(_1, 1); } +// CHECK-LABEL: @xvrotri_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrotri.w(<8 x i32> [[_1]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvrotri_w(v8i32 _1) { return __builtin_lasx_xvrotri_w(_1, 1); } +// CHECK-LABEL: @xvrotri_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrotri.d(<4 x i64> [[_1]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvrotri_d(v4i64 _1) { return __builtin_lasx_xvrotri_d(_1, 1); } +// CHECK-LABEL: @xvextl_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvextl.q.d(<4 x i64> [[_1]]) +// CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvextl_q_d(v4i64 _1) { return __builtin_lasx_xvextl_q_d(_1); } +// CHECK-LABEL: @xvsrlni_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsrlni_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrlni_b_h(_1, _2, 1); } +// CHECK-LABEL: @xvsrlni_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsrlni_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrlni_h_w(_1, _2, 1); } +// CHECK-LABEL: @xvsrlni_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsrlni_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrlni_w_d(_1, _2, 1); } +// CHECK-LABEL: @xvsrlni_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsrlni_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrlni_d_q(_1, _2, 1); } +// CHECK-LABEL: @xvsrlrni_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlrni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsrlrni_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrlrni_b_h(_1, _2, 1); } +// CHECK-LABEL: @xvsrlrni_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlrni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsrlrni_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrlrni_h_w(_1, _2, 1); } +// CHECK-LABEL: @xvsrlrni_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlrni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsrlrni_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrlrni_w_d(_1, _2, 1); } +// CHECK-LABEL: @xvsrlrni_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlrni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsrlrni_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrlrni_d_q(_1, _2, 1); } +// CHECK-LABEL: @xvssrlni_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvssrlni_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvssrlni_b_h(_1, _2, 1); } +// CHECK-LABEL: @xvssrlni_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvssrlni_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrlni_h_w(_1, _2, 1); } +// CHECK-LABEL: @xvssrlni_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvssrlni_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrlni_w_d(_1, _2, 1); } +// CHECK-LABEL: @xvssrlni_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvssrlni_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrlni_d_q(_1, _2, 1); } +// CHECK-LABEL: @xvssrlni_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlni.bu.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvssrlni_bu_h(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvssrlni_bu_h(_1, _2, 1); } +// CHECK-LABEL: @xvssrlni_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlni.hu.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvssrlni_hu_w(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvssrlni_hu_w(_1, _2, 1); } +// CHECK-LABEL: @xvssrlni_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlni.wu.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvssrlni_wu_d(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvssrlni_wu_d(_1, _2, 1); } +// CHECK-LABEL: @xvssrlni_du_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlni.du.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvssrlni_du_q(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvssrlni_du_q(_1, _2, 1); } +// CHECK-LABEL: @xvssrlrni_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvssrlrni_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvssrlrni_b_h(_1, _2, 1); } +// CHECK-LABEL: @xvssrlrni_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvssrlrni_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrlrni_h_w(_1, _2, 1); } +// CHECK-LABEL: @xvssrlrni_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvssrlrni_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrlrni_w_d(_1, _2, 1); } +// CHECK-LABEL: @xvssrlrni_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvssrlrni_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrlrni_d_q(_1, _2, 1); } +// CHECK-LABEL: @xvssrlrni_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.bu.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvssrlrni_bu_h(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvssrlrni_bu_h(_1, _2, 1); } +// CHECK-LABEL: @xvssrlrni_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.hu.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvssrlrni_hu_w(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvssrlrni_hu_w(_1, _2, 1); } +// CHECK-LABEL: @xvssrlrni_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.wu.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvssrlrni_wu_d(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvssrlrni_wu_d(_1, _2, 1); } +// CHECK-LABEL: @xvssrlrni_du_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.du.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvssrlrni_du_q(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvssrlrni_du_q(_1, _2, 1); } +// CHECK-LABEL: @xvsrani_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrani.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsrani_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrani_b_h(_1, _2, 1); } +// CHECK-LABEL: @xvsrani_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrani.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsrani_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrani_h_w(_1, _2, 1); } +// CHECK-LABEL: @xvsrani_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrani.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsrani_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrani_w_d(_1, _2, 1); } +// CHECK-LABEL: @xvsrani_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrani.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsrani_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrani_d_q(_1, _2, 1); } +// CHECK-LABEL: @xvsrarni_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvsrarni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvsrarni_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrarni_b_h(_1, _2, 1); } +// CHECK-LABEL: @xvsrarni_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvsrarni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvsrarni_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrarni_h_w(_1, _2, 1); } +// CHECK-LABEL: @xvsrarni_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvsrarni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvsrarni_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrarni_w_d(_1, _2, 1); } +// CHECK-LABEL: @xvsrarni_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvsrarni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvsrarni_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrarni_d_q(_1, _2, 1); } +// CHECK-LABEL: @xvssrani_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrani.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvssrani_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvssrani_b_h(_1, _2, 1); } +// CHECK-LABEL: @xvssrani_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrani.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvssrani_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrani_h_w(_1, _2, 1); } +// CHECK-LABEL: @xvssrani_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrani.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvssrani_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrani_w_d(_1, _2, 1); } +// CHECK-LABEL: @xvssrani_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrani.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvssrani_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrani_d_q(_1, _2, 1); } +// CHECK-LABEL: @xvssrani_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrani.bu.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvssrani_bu_h(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvssrani_bu_h(_1, _2, 1); } +// CHECK-LABEL: @xvssrani_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrani.hu.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvssrani_hu_w(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvssrani_hu_w(_1, _2, 1); } +// CHECK-LABEL: @xvssrani_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrani.wu.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvssrani_wu_d(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvssrani_wu_d(_1, _2, 1); } +// CHECK-LABEL: @xvssrani_du_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrani.du.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvssrani_du_q(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvssrani_du_q(_1, _2, 1); } +// CHECK-LABEL: @xvssrarni_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarni.b.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvssrarni_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvssrarni_b_h(_1, _2, 1); } +// CHECK-LABEL: @xvssrarni_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarni.h.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvssrarni_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrarni_h_w(_1, _2, 1); } +// CHECK-LABEL: @xvssrarni_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarni.w.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvssrarni_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrarni_w_d(_1, _2, 1); } +// CHECK-LABEL: @xvssrarni_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrarni.d.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvssrarni_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrarni_d_q(_1, _2, 1); } +// CHECK-LABEL: @xvssrarni_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <32 x i8>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarni.bu.h(<32 x i8> [[_1]], <32 x i8> [[_2]], i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32u8 xvssrarni_bu_h(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvssrarni_bu_h(_1, _2, 1); } +// CHECK-LABEL: @xvssrarni_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <16 x i16>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarni.hu.w(<16 x i16> [[_1]], <16 x i16> [[_2]], i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16u16 xvssrarni_hu_w(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvssrarni_hu_w(_1, _2, 1); } +// CHECK-LABEL: @xvssrarni_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x i32>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarni.wu.d(<8 x i32> [[_1]], <8 x i32> [[_2]], i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8u32 xvssrarni_wu_d(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvssrarni_wu_d(_1, _2, 1); } +// CHECK-LABEL: @xvssrarni_du_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x i64>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvssrarni.du.q(<4 x i64> [[_1]], <4 x i64> [[_2]], i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4u64 xvssrarni_du_q(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvssrarni_du_q(_1, _2, 1); } +// CHECK-LABEL: @xbnz_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.b(<32 x i8> [[_1]]) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int xbnz_b(v32u8 _1) { return __builtin_lasx_xbnz_b(_1); } +// CHECK-LABEL: @xbnz_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.d(<4 x i64> [[_1]]) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int xbnz_d(v4u64 _1) { return __builtin_lasx_xbnz_d(_1); } +// CHECK-LABEL: @xbnz_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.h(<16 x i16> [[_1]]) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int xbnz_h(v16u16 _1) { return __builtin_lasx_xbnz_h(_1); } +// CHECK-LABEL: @xbnz_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.v(<32 x i8> [[_1]]) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int xbnz_v(v32u8 _1) { return __builtin_lasx_xbnz_v(_1); } +// CHECK-LABEL: @xbnz_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbnz.w(<8 x i32> [[_1]]) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int xbnz_w(v8u32 _1) { return __builtin_lasx_xbnz_w(_1); } +// CHECK-LABEL: @xbz_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.b(<32 x i8> [[_1]]) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int xbz_b(v32u8 _1) { return __builtin_lasx_xbz_b(_1); } +// CHECK-LABEL: @xbz_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.d(<4 x i64> [[_1]]) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int xbz_d(v4u64 _1) { return __builtin_lasx_xbz_d(_1); } +// CHECK-LABEL: @xbz_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <16 x i16>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.h(<16 x i16> [[_1]]) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int xbz_h(v16u16 _1) { return __builtin_lasx_xbz_h(_1); } +// CHECK-LABEL: @xbz_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <32 x i8>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.v(<32 x i8> [[_1]]) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int xbz_v(v32u8 _1) { return __builtin_lasx_xbz_v(_1); } +// CHECK-LABEL: @xbz_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lasx.xbz.w(<8 x i32> [[_1]]) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int xbz_w(v8u32 _1) { return __builtin_lasx_xbz_w(_1); } +// CHECK-LABEL: @xvfcmp_caf_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.caf.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfcmp_caf_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_caf_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_caf_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.caf.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfcmp_caf_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_caf_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_ceq_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.ceq.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfcmp_ceq_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_ceq_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_ceq_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.ceq.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfcmp_ceq_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_ceq_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_cle_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cle.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfcmp_cle_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cle_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_cle_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cle.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfcmp_cle_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cle_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_clt_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.clt.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfcmp_clt_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_clt_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_clt_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.clt.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfcmp_clt_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_clt_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_cne_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cne.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfcmp_cne_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cne_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_cne_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cne.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfcmp_cne_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cne_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_cor_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cor.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfcmp_cor_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cor_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_cor_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cor.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfcmp_cor_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cor_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_cueq_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cueq.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfcmp_cueq_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cueq_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_cueq_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cueq.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfcmp_cueq_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cueq_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_cule_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cule.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfcmp_cule_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cule_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_cule_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cule.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfcmp_cule_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cule_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_cult_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cult.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfcmp_cult_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cult_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_cult_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cult.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfcmp_cult_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cult_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_cun_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cun.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfcmp_cun_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cun_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_cune_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cune.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfcmp_cune_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cune_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_cune_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cune.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfcmp_cune_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cune_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_cun_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cun.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfcmp_cun_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cun_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_saf_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.saf.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfcmp_saf_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_saf_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_saf_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.saf.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfcmp_saf_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_saf_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_seq_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.seq.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfcmp_seq_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_seq_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_seq_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.seq.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfcmp_seq_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_seq_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_sle_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sle.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfcmp_sle_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sle_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_sle_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sle.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfcmp_sle_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sle_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_slt_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.slt.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfcmp_slt_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_slt_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_slt_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.slt.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfcmp_slt_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_slt_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_sne_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sne.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfcmp_sne_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sne_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_sne_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sne.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfcmp_sne_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sne_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_sor_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sor.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfcmp_sor_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sor_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_sor_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sor.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfcmp_sor_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sor_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_sueq_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sueq.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfcmp_sueq_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sueq_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_sueq_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sueq.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfcmp_sueq_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sueq_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_sule_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sule.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfcmp_sule_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sule_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_sule_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sule.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfcmp_sule_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sule_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_sult_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sult.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfcmp_sult_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sult_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_sult_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sult.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfcmp_sult_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sult_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_sun_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sun.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfcmp_sun_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sun_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_sune_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <4 x double>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sune.d(<4 x double> [[_1]], <4 x double> [[_2]]) +// CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvfcmp_sune_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sune_d(_1, _2); } +// CHECK-LABEL: @xvfcmp_sune_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sune.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfcmp_sune_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sune_s(_1, _2); } +// CHECK-LABEL: @xvfcmp_sun_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[_2:%.*]] = load <8 x float>, ptr [[TMP1:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sun.s(<8 x float> [[_1]], <8 x float> [[_2]]) +// CHECK-NEXT: store <8 x i32> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvfcmp_sun_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sun_s(_1, _2); } +// CHECK-LABEL: @xvpickve_d_f( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.xvpickve.d.f(<4 x double> [[_1]], i32 1) +// CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4f64 xvpickve_d_f(v4f64 _1) { return __builtin_lasx_xvpickve_d_f(_1, 1); } +// CHECK-LABEL: @xvpickve_w_f( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.xvpickve.w.f(<8 x float> [[_1]], i32 1) +// CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8f32 xvpickve_w_f(v8f32 _1) { return __builtin_lasx_xvpickve_w_f(_1, 1); } +// CHECK-LABEL: @xvrepli_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <32 x i8> @llvm.loongarch.lasx.xvrepli.b(i32 1) +// CHECK-NEXT: store <32 x i8> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v32i8 xvrepli_b() { return __builtin_lasx_xvrepli_b(1); } +// CHECK-LABEL: @xvrepli_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.xvrepli.d(i32 1) +// CHECK-NEXT: store <4 x i64> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v4i64 xvrepli_d() { return __builtin_lasx_xvrepli_d(1); } +// CHECK-LABEL: @xvrepli_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i16> @llvm.loongarch.lasx.xvrepli.h(i32 1) +// CHECK-NEXT: store <16 x i16> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v16i16 xvrepli_h() { return __builtin_lasx_xvrepli_h(1); } +// CHECK-LABEL: @xvrepli_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i32> @llvm.loongarch.lasx.xvrepli.w(i32 1) +// CHECK-NEXT: store <8 x i32> [[TMP0]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]] +// CHECK-NEXT: ret void +// +v8i32 xvrepli_w() { return __builtin_lasx_xvrepli_w(1); } diff --git a/clang/test/CodeGen/LoongArch/lasx/inline-asm-gcc-regs-error.c b/clang/test/CodeGen/LoongArch/lasx/inline-asm-gcc-regs-error.c new file mode 100644 index 0000000000000000000000000000000000000000..1fd602574b8e8d5a18dd5d6e5e2462867e121fe7 --- /dev/null +++ b/clang/test/CodeGen/LoongArch/lasx/inline-asm-gcc-regs-error.c @@ -0,0 +1,10 @@ +// RUN: not %clang_cc1 -triple loongarch64 -emit-llvm -O2 %s 2>&1 -o - | FileCheck %s + +typedef signed char v32i8 __attribute__((vector_size(32), aligned(32))); + +void test() { +// CHECK: :[[#@LINE+1]]:28: error: unknown register name 'xr0' in asm + register v32i8 p0 asm ("xr0"); +// CHECK: :[[#@LINE+1]]:29: error: unknown register name '$xr32' in asm + register v32i8 p32 asm ("$xr32"); +} diff --git a/clang/test/CodeGen/LoongArch/lasx/inline-asm-gcc-regs.c b/clang/test/CodeGen/LoongArch/lasx/inline-asm-gcc-regs.c new file mode 100644 index 0000000000000000000000000000000000000000..ed1a9660a06c9fbe04e46be2ef0e12ff0c4030af --- /dev/null +++ b/clang/test/CodeGen/LoongArch/lasx/inline-asm-gcc-regs.c @@ -0,0 +1,36 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --filter "^define |tail call" +// RUN: %clang_cc1 -triple loongarch64 -emit-llvm -O2 %s -o - | FileCheck %s + +typedef signed char v32i8 __attribute__((vector_size(32), aligned(32))); + +// CHECK-LABEL: @test_xr0( +// CHECK: tail call void asm sideeffect "", "{$xr0}"(<32 x i8> undef) #[[ATTR1:[0-9]+]], !srcloc !2 +// +void test_xr0() { + register v32i8 a asm ("$xr0"); + asm ("" :: "f"(a)); +} + +// CHECK-LABEL: @test_xr7( +// CHECK: tail call void asm sideeffect "", "{$xr7}"(<32 x i8> undef) #[[ATTR1]], !srcloc !3 +// +void test_xr7() { + register v32i8 a asm ("$xr7"); + asm ("" :: "f"(a)); +} + +// CHECK-LABEL: @test_xr15( +// CHECK: tail call void asm sideeffect "", "{$xr15}"(<32 x i8> undef) #[[ATTR1]], !srcloc !4 +// +void test_xr15() { + register v32i8 a asm ("$xr15"); + asm ("" :: "f"(a)); +} + +// CHECK-LABEL: @test_xr31( +// CHECK: tail call void asm sideeffect "", "{$xr31}"(<32 x i8> undef) #[[ATTR1]], !srcloc !5 +// +void test_xr31() { + register v32i8 a asm ("$xr31"); + asm ("" :: "f"(a)); +} diff --git a/clang/test/CodeGen/LoongArch/lasx/inline-asm-operand-modifier.c b/clang/test/CodeGen/LoongArch/lasx/inline-asm-operand-modifier.c new file mode 100644 index 0000000000000000000000000000000000000000..a5cc8798fd66b88032a7a59ca4ef73d7abdf256b --- /dev/null +++ b/clang/test/CodeGen/LoongArch/lasx/inline-asm-operand-modifier.c @@ -0,0 +1,15 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2 +// RUN: %clang_cc1 -triple loongarch64 -emit-llvm -O2 %s -o - | FileCheck %s + +typedef long long v4i64 __attribute__ ((vector_size(32), aligned(32))); + +// CHECK-LABEL: define dso_local void @test_u +// CHECK-SAME: () local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i64> asm sideeffect "xvldi ${0:u}, 1", "=f"() #[[ATTR1:[0-9]+]], !srcloc !2 +// CHECK-NEXT: ret void +// +void test_u() { + v4i64 v4i64_r; + asm volatile ("xvldi %u0, 1" : "=f" (v4i64_r)); +} diff --git a/clang/test/CodeGen/LoongArch/lsx/builtin-alias-error.c b/clang/test/CodeGen/LoongArch/lsx/builtin-alias-error.c new file mode 100644 index 0000000000000000000000000000000000000000..69cf2254fdd797944bc2333d4c01a4d62c2c45b7 --- /dev/null +++ b/clang/test/CodeGen/LoongArch/lsx/builtin-alias-error.c @@ -0,0 +1,1359 @@ +// RUN: %clang_cc1 -triple loongarch64 -target-feature +lsx -verify %s + +#include + +v16i8 vslli_b(v16i8 _1, int var) { + v16i8 res = __lsx_vslli_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lsx_vslli_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lsx_vslli_b(_1, var); // expected-error {{argument to '__builtin_lsx_vslli_b' must be a constant integer}} + return res; +} + +v8i16 vslli_h(v8i16 _1, int var) { + v8i16 res = __lsx_vslli_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lsx_vslli_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lsx_vslli_h(_1, var); // expected-error {{argument to '__builtin_lsx_vslli_h' must be a constant integer}} + return res; +} + +v4i32 vslli_w(v4i32 _1, int var) { + v4i32 res = __lsx_vslli_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vslli_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vslli_w(_1, var); // expected-error {{argument to '__builtin_lsx_vslli_w' must be a constant integer}} + return res; +} + +v2i64 vslli_d(v2i64 _1, int var) { + v2i64 res = __lsx_vslli_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lsx_vslli_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lsx_vslli_d(_1, var); // expected-error {{argument to '__builtin_lsx_vslli_d' must be a constant integer}} + return res; +} + +v16i8 vsrai_b(v16i8 _1, int var) { + v16i8 res = __lsx_vsrai_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lsx_vsrai_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lsx_vsrai_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsrai_b' must be a constant integer}} + return res; +} + +v8i16 vsrai_h(v8i16 _1, int var) { + v8i16 res = __lsx_vsrai_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lsx_vsrai_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lsx_vsrai_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsrai_h' must be a constant integer}} + return res; +} + +v4i32 vsrai_w(v4i32 _1, int var) { + v4i32 res = __lsx_vsrai_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vsrai_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vsrai_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsrai_w' must be a constant integer}} + return res; +} + +v2i64 vsrai_d(v2i64 _1, int var) { + v2i64 res = __lsx_vsrai_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lsx_vsrai_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lsx_vsrai_d(_1, var); // expected-error {{argument to '__builtin_lsx_vsrai_d' must be a constant integer}} + return res; +} + +v16i8 vsrari_b(v16i8 _1, int var) { + v16i8 res = __lsx_vsrari_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lsx_vsrari_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lsx_vsrari_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsrari_b' must be a constant integer}} + return res; +} + +v8i16 vsrari_h(v8i16 _1, int var) { + v8i16 res = __lsx_vsrari_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lsx_vsrari_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lsx_vsrari_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsrari_h' must be a constant integer}} + return res; +} + +v4i32 vsrari_w(v4i32 _1, int var) { + v4i32 res = __lsx_vsrari_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vsrari_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vsrari_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsrari_w' must be a constant integer}} + return res; +} + +v2i64 vsrari_d(v2i64 _1, int var) { + v2i64 res = __lsx_vsrari_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lsx_vsrari_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lsx_vsrari_d(_1, var); // expected-error {{argument to '__builtin_lsx_vsrari_d' must be a constant integer}} + return res; +} + +v16i8 vsrli_b(v16i8 _1, int var) { + v16i8 res = __lsx_vsrli_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lsx_vsrli_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lsx_vsrli_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsrli_b' must be a constant integer}} + return res; +} + +v8i16 vsrli_h(v8i16 _1, int var) { + v8i16 res = __lsx_vsrli_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lsx_vsrli_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lsx_vsrli_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsrli_h' must be a constant integer}} + return res; +} + +v4i32 vsrli_w(v4i32 _1, int var) { + v4i32 res = __lsx_vsrli_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vsrli_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vsrli_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsrli_w' must be a constant integer}} + return res; +} + +v2i64 vsrli_d(v2i64 _1, int var) { + v2i64 res = __lsx_vsrli_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lsx_vsrli_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lsx_vsrli_d(_1, var); // expected-error {{argument to '__builtin_lsx_vsrli_d' must be a constant integer}} + return res; +} + +v16i8 vsrlri_b(v16i8 _1, int var) { + v16i8 res = __lsx_vsrlri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lsx_vsrlri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lsx_vsrlri_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsrlri_b' must be a constant integer}} + return res; +} + +v8i16 vsrlri_h(v8i16 _1, int var) { + v8i16 res = __lsx_vsrlri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lsx_vsrlri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lsx_vsrlri_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsrlri_h' must be a constant integer}} + return res; +} + +v4i32 vsrlri_w(v4i32 _1, int var) { + v4i32 res = __lsx_vsrlri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vsrlri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vsrlri_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsrlri_w' must be a constant integer}} + return res; +} + +v2i64 vsrlri_d(v2i64 _1, int var) { + v2i64 res = __lsx_vsrlri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lsx_vsrlri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lsx_vsrlri_d(_1, var); // expected-error {{argument to '__builtin_lsx_vsrlri_d' must be a constant integer}} + return res; +} + +v16u8 vbitclri_b(v16u8 _1, int var) { + v16u8 res = __lsx_vbitclri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lsx_vbitclri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lsx_vbitclri_b(_1, var); // expected-error {{argument to '__builtin_lsx_vbitclri_b' must be a constant integer}} + return res; +} + +v8u16 vbitclri_h(v8u16 _1, int var) { + v8u16 res = __lsx_vbitclri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lsx_vbitclri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lsx_vbitclri_h(_1, var); // expected-error {{argument to '__builtin_lsx_vbitclri_h' must be a constant integer}} + return res; +} + +v4u32 vbitclri_w(v4u32 _1, int var) { + v4u32 res = __lsx_vbitclri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vbitclri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vbitclri_w(_1, var); // expected-error {{argument to '__builtin_lsx_vbitclri_w' must be a constant integer}} + return res; +} + +v2u64 vbitclri_d(v2u64 _1, int var) { + v2u64 res = __lsx_vbitclri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lsx_vbitclri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lsx_vbitclri_d(_1, var); // expected-error {{argument to '__builtin_lsx_vbitclri_d' must be a constant integer}} + return res; +} + +v16u8 vbitseti_b(v16u8 _1, int var) { + v16u8 res = __lsx_vbitseti_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lsx_vbitseti_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lsx_vbitseti_b(_1, var); // expected-error {{argument to '__builtin_lsx_vbitseti_b' must be a constant integer}} + return res; +} + +v8u16 vbitseti_h(v8u16 _1, int var) { + v8u16 res = __lsx_vbitseti_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lsx_vbitseti_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lsx_vbitseti_h(_1, var); // expected-error {{argument to '__builtin_lsx_vbitseti_h' must be a constant integer}} + return res; +} + +v4u32 vbitseti_w(v4u32 _1, int var) { + v4u32 res = __lsx_vbitseti_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vbitseti_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vbitseti_w(_1, var); // expected-error {{argument to '__builtin_lsx_vbitseti_w' must be a constant integer}} + return res; +} + +v2u64 vbitseti_d(v2u64 _1, int var) { + v2u64 res = __lsx_vbitseti_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lsx_vbitseti_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lsx_vbitseti_d(_1, var); // expected-error {{argument to '__builtin_lsx_vbitseti_d' must be a constant integer}} + return res; +} + +v16u8 vbitrevi_b(v16u8 _1, int var) { + v16u8 res = __lsx_vbitrevi_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lsx_vbitrevi_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lsx_vbitrevi_b(_1, var); // expected-error {{argument to '__builtin_lsx_vbitrevi_b' must be a constant integer}} + return res; +} + +v8u16 vbitrevi_h(v8u16 _1, int var) { + v8u16 res = __lsx_vbitrevi_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lsx_vbitrevi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lsx_vbitrevi_h(_1, var); // expected-error {{argument to '__builtin_lsx_vbitrevi_h' must be a constant integer}} + return res; +} + +v4u32 vbitrevi_w(v4u32 _1, int var) { + v4u32 res = __lsx_vbitrevi_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vbitrevi_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vbitrevi_w(_1, var); // expected-error {{argument to '__builtin_lsx_vbitrevi_w' must be a constant integer}} + return res; +} + +v2u64 vbitrevi_d(v2u64 _1, int var) { + v2u64 res = __lsx_vbitrevi_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lsx_vbitrevi_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lsx_vbitrevi_d(_1, var); // expected-error {{argument to '__builtin_lsx_vbitrevi_d' must be a constant integer}} + return res; +} + +v16i8 vaddi_bu(v16i8 _1, int var) { + v16i8 res = __lsx_vaddi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vaddi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vaddi_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vaddi_bu' must be a constant integer}} + return res; +} + +v8i16 vaddi_hu(v8i16 _1, int var) { + v8i16 res = __lsx_vaddi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vaddi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vaddi_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vaddi_hu' must be a constant integer}} + return res; +} + +v4i32 vaddi_wu(v4i32 _1, int var) { + v4i32 res = __lsx_vaddi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vaddi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vaddi_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vaddi_wu' must be a constant integer}} + return res; +} + +v2i64 vaddi_du(v2i64 _1, int var) { + v2i64 res = __lsx_vaddi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vaddi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vaddi_du(_1, var); // expected-error {{argument to '__builtin_lsx_vaddi_du' must be a constant integer}} + return res; +} + +v16i8 vsubi_bu(v16i8 _1, int var) { + v16i8 res = __lsx_vsubi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vsubi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vsubi_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vsubi_bu' must be a constant integer}} + return res; +} + +v8i16 vsubi_hu(v8i16 _1, int var) { + v8i16 res = __lsx_vsubi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vsubi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vsubi_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vsubi_hu' must be a constant integer}} + return res; +} + +v4i32 vsubi_wu(v4i32 _1, int var) { + v4i32 res = __lsx_vsubi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vsubi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vsubi_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vsubi_wu' must be a constant integer}} + return res; +} + +v2i64 vsubi_du(v2i64 _1, int var) { + v2i64 res = __lsx_vsubi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vsubi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vsubi_du(_1, var); // expected-error {{argument to '__builtin_lsx_vsubi_du' must be a constant integer}} + return res; +} + +v16i8 vmaxi_b(v16i8 _1, int var) { + v16i8 res = __lsx_vmaxi_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lsx_vmaxi_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lsx_vmaxi_b(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_b' must be a constant integer}} + return res; +} + +v8i16 vmaxi_h(v8i16 _1, int var) { + v8i16 res = __lsx_vmaxi_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lsx_vmaxi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lsx_vmaxi_h(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_h' must be a constant integer}} + return res; +} + +v4i32 vmaxi_w(v4i32 _1, int var) { + v4i32 res = __lsx_vmaxi_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lsx_vmaxi_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lsx_vmaxi_w(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_w' must be a constant integer}} + return res; +} + +v2i64 vmaxi_d(v2i64 _1, int var) { + v2i64 res = __lsx_vmaxi_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lsx_vmaxi_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lsx_vmaxi_d(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_d' must be a constant integer}} + return res; +} + +v16u8 vmaxi_bu(v16u8 _1, int var) { + v16u8 res = __lsx_vmaxi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vmaxi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vmaxi_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_bu' must be a constant integer}} + return res; +} + +v8u16 vmaxi_hu(v8u16 _1, int var) { + v8u16 res = __lsx_vmaxi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vmaxi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vmaxi_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_hu' must be a constant integer}} + return res; +} + +v4u32 vmaxi_wu(v4u32 _1, int var) { + v4u32 res = __lsx_vmaxi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vmaxi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vmaxi_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_wu' must be a constant integer}} + return res; +} + +v2u64 vmaxi_du(v2u64 _1, int var) { + v2u64 res = __lsx_vmaxi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vmaxi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vmaxi_du(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_du' must be a constant integer}} + return res; +} + +v16i8 vmini_b(v16i8 _1, int var) { + v16i8 res = __lsx_vmini_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lsx_vmini_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lsx_vmini_b(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_b' must be a constant integer}} + return res; +} + +v8i16 vmini_h(v8i16 _1, int var) { + v8i16 res = __lsx_vmini_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lsx_vmini_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lsx_vmini_h(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_h' must be a constant integer}}} + return res; +} + +v4i32 vmini_w(v4i32 _1, int var) { + v4i32 res = __lsx_vmini_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lsx_vmini_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lsx_vmini_w(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_w' must be a constant integer}} + return res; +} + +v2i64 vmini_d(v2i64 _1, int var) { + v2i64 res = __lsx_vmini_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lsx_vmini_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lsx_vmini_d(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_d' must be a constant integer}} + return res; +} + +v16u8 vmini_bu(v16u8 _1, int var) { + v16u8 res = __lsx_vmini_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vmini_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vmini_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_bu' must be a constant integer}} + return res; +} + +v8u16 vmini_hu(v8u16 _1, int var) { + v8u16 res = __lsx_vmini_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vmini_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vmini_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_hu' must be a constant integer}} + return res; +} + +v4u32 vmini_wu(v4u32 _1, int var) { + v4u32 res = __lsx_vmini_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vmini_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vmini_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_wu' must be a constant integer}} + return res; +} + +v2u64 vmini_du(v2u64 _1, int var) { + v2u64 res = __lsx_vmini_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vmini_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vmini_du(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_du' must be a constant integer}} + return res; +} + +v16i8 vseqi_b(v16i8 _1, int var) { + v16i8 res = __lsx_vseqi_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lsx_vseqi_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lsx_vseqi_b(_1, var); // expected-error {{argument to '__builtin_lsx_vseqi_b' must be a constant integer}} + return res; +} + +v8i16 vseqi_h(v8i16 _1, int var) { + v8i16 res = __lsx_vseqi_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lsx_vseqi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lsx_vseqi_h(_1, var); // expected-error {{argument to '__builtin_lsx_vseqi_h' must be a constant integer}} + return res; +} + +v4i32 vseqi_w(v4i32 _1, int var) { + v4i32 res = __lsx_vseqi_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lsx_vseqi_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lsx_vseqi_w(_1, var); // expected-error {{argument to '__builtin_lsx_vseqi_w' must be a constant integer}} + return res; +} + +v2i64 vseqi_d(v2i64 _1, int var) { + v2i64 res = __lsx_vseqi_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lsx_vseqi_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lsx_vseqi_d(_1, var); // expected-error {{argument to '__builtin_lsx_vseqi_d' must be a constant integer}} + return res; +} + +v16i8 vslti_b(v16i8 _1, int var) { + v16i8 res = __lsx_vslti_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lsx_vslti_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lsx_vslti_b(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_b' must be a constant integer}} + return res; +} + +v8i16 vslti_h(v8i16 _1, int var) { + v8i16 res = __lsx_vslti_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lsx_vslti_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lsx_vslti_h(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_h' must be a constant integer}} + return res; +} + +v4i32 vslti_w(v4i32 _1, int var) { + v4i32 res = __lsx_vslti_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lsx_vslti_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lsx_vslti_w(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_w' must be a constant integer}} + return res; +} + +v2i64 vslti_d(v2i64 _1, int var) { + v2i64 res = __lsx_vslti_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lsx_vslti_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lsx_vslti_d(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_d' must be a constant integer}} + return res; +} + +v16i8 vslti_bu(v16u8 _1, int var) { + v16i8 res = __lsx_vslti_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vslti_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vslti_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_bu' must be a constant integer}} + return res; +} + +v8i16 vslti_hu(v8u16 _1, int var) { + v8i16 res = __lsx_vslti_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vslti_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vslti_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_hu' must be a constant integer}} + return res; +} + +v4i32 vslti_wu(v4u32 _1, int var) { + v4i32 res = __lsx_vslti_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vslti_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vslti_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_wu' must be a constant integer}} + return res; +} + +v2i64 vslti_du(v2u64 _1, int var) { + v2i64 res = __lsx_vslti_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vslti_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vslti_du(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_du' must be a constant integer}} + return res; +} + +v16i8 vslei_b(v16i8 _1, int var) { + v16i8 res = __lsx_vslei_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lsx_vslei_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lsx_vslei_b(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_b' must be a constant integer}} + return res; +} + +v8i16 vslei_h(v8i16 _1, int var) { + v8i16 res = __lsx_vslei_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lsx_vslei_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lsx_vslei_h(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_h' must be a constant integer}} + return res; +} + +v4i32 vslei_w(v4i32 _1, int var) { + v4i32 res = __lsx_vslei_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lsx_vslei_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lsx_vslei_w(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_w' must be a constant integer}} + return res; +} + +v2i64 vslei_d(v2i64 _1, int var) { + v2i64 res = __lsx_vslei_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __lsx_vslei_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __lsx_vslei_d(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_d' must be a constant integer}} + return res; +} + +v16i8 vslei_bu(v16u8 _1, int var) { + v16i8 res = __lsx_vslei_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vslei_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vslei_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_bu' must be a constant integer}} + return res; +} + +v8i16 vslei_hu(v8u16 _1, int var) { + v8i16 res = __lsx_vslei_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vslei_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vslei_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_hu' must be a constant integer}} + return res; +} + +v4i32 vslei_wu(v4u32 _1, int var) { + v4i32 res = __lsx_vslei_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vslei_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vslei_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_wu' must be a constant integer}} + return res; +} + +v2i64 vslei_du(v2u64 _1, int var) { + v2i64 res = __lsx_vslei_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vslei_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vslei_du(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_du' must be a constant integer}} + return res; +} + +v16i8 vsat_b(v16i8 _1, int var) { + v16i8 res = __lsx_vsat_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lsx_vsat_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lsx_vsat_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_b' must be a constant integer}} + return res; +} + +v8i16 vsat_h(v8i16 _1, int var) { + v8i16 res = __lsx_vsat_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lsx_vsat_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lsx_vsat_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_h' must be a constant integer}} + return res; +} + +v4i32 vsat_w(v4i32 _1, int var) { + v4i32 res = __lsx_vsat_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vsat_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vsat_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_w' must be a constant integer}} + return res; +} + +v2i64 vsat_d(v2i64 _1, int var) { + v2i64 res = __lsx_vsat_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lsx_vsat_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lsx_vsat_d(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_d' must be a constant integer}} + return res; +} + +v16u8 vsat_bu(v16u8 _1, int var) { + v16u8 res = __lsx_vsat_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lsx_vsat_bu(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lsx_vsat_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_bu' must be a constant integer}} + return res; +} + +v8u16 vsat_hu(v8u16 _1, int var) { + v8u16 res = __lsx_vsat_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lsx_vsat_hu(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lsx_vsat_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_hu' must be a constant integer}} + return res; +} + +v4u32 vsat_wu(v4u32 _1, int var) { + v4u32 res = __lsx_vsat_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vsat_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vsat_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_wu' must be a constant integer}} + return res; +} + +v2u64 vsat_du(v2u64 _1, int var) { + v2u64 res = __lsx_vsat_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lsx_vsat_du(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lsx_vsat_du(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_du' must be a constant integer}} + return res; +} + +v16i8 vreplvei_b(v16i8 _1, int var) { + v16i8 res = __lsx_vreplvei_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lsx_vreplvei_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lsx_vreplvei_b(_1, var); // expected-error {{argument to '__builtin_lsx_vreplvei_b' must be a constant integer}} + return res; +} + +v8i16 vreplvei_h(v8i16 _1, int var) { + v8i16 res = __lsx_vreplvei_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lsx_vreplvei_h(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lsx_vreplvei_h(_1, var); // expected-error {{argument to '__builtin_lsx_vreplvei_h' must be a constant integer}} + return res; +} + +v4i32 vreplvei_w(v4i32 _1, int var) { + v4i32 res = __lsx_vreplvei_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} + res |= __lsx_vreplvei_w(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + res |= __lsx_vreplvei_w(_1, var); // expected-error {{argument to '__builtin_lsx_vreplvei_w' must be a constant integer}} + return res; +} + +v2i64 vreplvei_d(v2i64 _1, int var) { + v2i64 res = __lsx_vreplvei_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 1]}} + res |= __lsx_vreplvei_d(_1, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} + res |= __lsx_vreplvei_d(_1, var); // expected-error {{argument to '__builtin_lsx_vreplvei_d' must be a constant integer}} + return res; +} + +v16u8 vandi_b(v16u8 _1, int var) { + v16u8 res = __lsx_vandi_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __lsx_vandi_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __lsx_vandi_b(_1, var); // expected-error {{argument to '__builtin_lsx_vandi_b' must be a constant integer}} + return res; +} + +v16u8 vori_b(v16u8 _1, int var) { + v16u8 res = __lsx_vori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __lsx_vori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __lsx_vori_b(_1, var); // expected-error {{argument to '__builtin_lsx_vori_b' must be a constant integer}} + return res; +} + +v16u8 vnori_b(v16u8 _1, int var) { + v16u8 res = __lsx_vnori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __lsx_vnori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __lsx_vnori_b(_1, var); // expected-error {{argument to '__builtin_lsx_vnori_b' must be a constant integer}} + return res; +} + +v16u8 vxori_b(v16u8 _1, int var) { + v16u8 res = __lsx_vxori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __lsx_vxori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __lsx_vxori_b(_1, var); // expected-error {{argument to '__builtin_lsx_vxori_b' must be a constant integer}} + return res; +} + +v16u8 vbitseli_b(v16u8 _1, v16u8 _2, int var) { + v16u8 res = __lsx_vbitseli_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __lsx_vbitseli_b(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __lsx_vbitseli_b(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vbitseli_b' must be a constant integer}} + return res; +} + +v16i8 vshuf4i_b(v16i8 _1, int var) { + v16i8 res = __lsx_vshuf4i_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __lsx_vshuf4i_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __lsx_vshuf4i_b(_1, var); // expected-error {{argument to '__builtin_lsx_vshuf4i_b' must be a constant integer}} + return res; +} + +v8i16 vshuf4i_h(v8i16 _1, int var) { + v8i16 res = __lsx_vshuf4i_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __lsx_vshuf4i_h(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __lsx_vshuf4i_h(_1, var); // expected-error {{argument to '__builtin_lsx_vshuf4i_h' must be a constant integer}} + return res; +} + +v4i32 vshuf4i_w(v4i32 _1, int var) { + v4i32 res = __lsx_vshuf4i_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __lsx_vshuf4i_w(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __lsx_vshuf4i_w(_1, var); // expected-error {{argument to '__builtin_lsx_vshuf4i_w' must be a constant integer}} + return res; +} + +int vpickve2gr_b(v16i8 _1, int var) { + int res = __lsx_vpickve2gr_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lsx_vpickve2gr_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lsx_vpickve2gr_b(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_b' must be a constant integer}} + return res; +} + +int vpickve2gr_h(v8i16 _1, int var) { + int res = __lsx_vpickve2gr_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lsx_vpickve2gr_h(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lsx_vpickve2gr_h(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_h' must be a constant integer}} + return res; +} + +int vpickve2gr_w(v4i32 _1, int var) { + int res = __lsx_vpickve2gr_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} + res |= __lsx_vpickve2gr_w(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + res |= __lsx_vpickve2gr_w(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_w' must be a constant integer}} + return res; +} + +long vpickve2gr_d(v2i64 _1, int var) { + long res = __lsx_vpickve2gr_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 1]}} + res |= __lsx_vpickve2gr_d(_1, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} + res |= __lsx_vpickve2gr_d(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_d' must be a constant integer}} + return res; +} + +unsigned int vpickve2gr_bu(v16i8 _1, int var) { + unsigned int res = __lsx_vpickve2gr_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lsx_vpickve2gr_bu(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lsx_vpickve2gr_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_bu' must be a constant integer}} + return res; +} + +unsigned int vpickve2gr_hu(v8i16 _1, int var) { + unsigned int res = __lsx_vpickve2gr_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lsx_vpickve2gr_hu(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lsx_vpickve2gr_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_hu' must be a constant integer}} + return res; +} + +unsigned int vpickve2gr_wu(v4i32 _1, int var) { + unsigned int res = __lsx_vpickve2gr_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} + res |= __lsx_vpickve2gr_wu(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + res |= __lsx_vpickve2gr_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_wu' must be a constant integer}} + return res; +} + +unsigned long int vpickve2gr_du(v2i64 _1, int var) { + unsigned long int res = __lsx_vpickve2gr_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 1]}} + res |= __lsx_vpickve2gr_du(_1, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} + res |= __lsx_vpickve2gr_du(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_du' must be a constant integer}} + return res; +} + +v16i8 vinsgr2vr_b(v16i8 _1, int var) { + v16i8 res = __lsx_vinsgr2vr_b(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lsx_vinsgr2vr_b(_1, 1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lsx_vinsgr2vr_b(_1, 1, var); // expected-error {{argument to '__builtin_lsx_vinsgr2vr_b' must be a constant integer}} + return res; +} + +v8i16 vinsgr2vr_h(v8i16 _1, int var) { + v8i16 res = __lsx_vinsgr2vr_h(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lsx_vinsgr2vr_h(_1, 1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lsx_vinsgr2vr_h(_1, 1, var); // expected-error {{argument to '__builtin_lsx_vinsgr2vr_h' must be a constant integer}} + return res; +} + +v4i32 vinsgr2vr_w(v4i32 _1, int var) { + v4i32 res = __lsx_vinsgr2vr_w(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} + res |= __lsx_vinsgr2vr_w(_1, 1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + res |= __lsx_vinsgr2vr_w(_1, 1, var); // expected-error {{argument to '__builtin_lsx_vinsgr2vr_w' must be a constant integer}} + return res; +} + +v2i64 vinsgr2vr_d(v2i64 _1, int var) { + v2i64 res = __lsx_vinsgr2vr_d(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 1]}} + res |= __lsx_vinsgr2vr_d(_1, 1, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} + res |= __lsx_vinsgr2vr_d(_1, 1, var); // expected-error {{argument to '__builtin_lsx_vinsgr2vr_d' must be a constant integer}} + return res; +} + +v8i16 vsllwil_h_b(v16i8 _1, int var) { + v8i16 res = __lsx_vsllwil_h_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lsx_vsllwil_h_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lsx_vsllwil_h_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_h_b' must be a constant integer}} + return res; +} + +v4i32 vsllwil_w_h(v8i16 _1, int var) { + v4i32 res = __lsx_vsllwil_w_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lsx_vsllwil_w_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lsx_vsllwil_w_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_w_h' must be a constant integer}} + return res; +} + +v2i64 vsllwil_d_w(v4i32 _1, int var) { + v2i64 res = __lsx_vsllwil_d_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vsllwil_d_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vsllwil_d_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_d_w' must be a constant integer}} + return res; +} + +v8u16 vsllwil_hu_bu(v16u8 _1, int var) { + v8u16 res = __lsx_vsllwil_hu_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lsx_vsllwil_hu_bu(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lsx_vsllwil_hu_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_hu_bu' must be a constant integer}} + return res; +} + +v4u32 vsllwil_wu_hu(v8u16 _1, int var) { + v4u32 res = __lsx_vsllwil_wu_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lsx_vsllwil_wu_hu(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lsx_vsllwil_wu_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_wu_hu' must be a constant integer}} + return res; +} + +v2u64 vsllwil_du_wu(v4u32 _1, int var) { + v2u64 res = __lsx_vsllwil_du_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vsllwil_du_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vsllwil_du_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_du_wu' must be a constant integer}} + return res; +} + +v16i8 vfrstpi_b(v16i8 _1, v16i8 _2, int var) { + v16i8 res = __lsx_vfrstpi_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vfrstpi_b(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vfrstpi_b(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vfrstpi_b' must be a constant integer}} + return res; +} + +v8i16 vfrstpi_h(v8i16 _1, v8i16 _2, int var) { + v8i16 res = __lsx_vfrstpi_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vfrstpi_h(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vfrstpi_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vfrstpi_h' must be a constant integer}} + return res; +} + +v2i64 vshuf4i_d(v2i64 _1, v2i64 _2, int var) { + v2i64 res = __lsx_vshuf4i_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __lsx_vshuf4i_d(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __lsx_vshuf4i_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vshuf4i_d' must be a constant integer}} + return res; +} + +v16i8 vbsrl_v(v16i8 _1, int var) { + v16i8 res = __lsx_vbsrl_v(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vbsrl_v(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vbsrl_v(_1, var); // expected-error {{argument to '__builtin_lsx_vbsrl_v' must be a constant integer}} + return res; +} + +v16i8 vbsll_v(v16i8 _1, int var) { + v16i8 res = __lsx_vbsll_v(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vbsll_v(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vbsll_v(_1, var); // expected-error {{argument to '__builtin_lsx_vbsll_v' must be a constant integer}} + return res; +} + +v16i8 vextrins_b(v16i8 _1, v16i8 _2, int var) { + v16i8 res = __lsx_vextrins_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __lsx_vextrins_b(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __lsx_vextrins_b(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vextrins_b' must be a constant integer}} + return res; +} + +v8i16 vextrins_h(v8i16 _1, v8i16 _2, int var) { + v8i16 res = __lsx_vextrins_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __lsx_vextrins_h(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __lsx_vextrins_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vextrins_h' must be a constant integer}} + return res; +} + +v4i32 vextrins_w(v4i32 _1, v4i32 _2, int var) { + v4i32 res = __lsx_vextrins_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __lsx_vextrins_w(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __lsx_vextrins_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vextrins_w' must be a constant integer}} + return res; +} + +v2i64 vextrins_d(v2i64 _1, v2i64 _2, int var) { + v2i64 res = __lsx_vextrins_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __lsx_vextrins_d(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __lsx_vextrins_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vextrins_d' must be a constant integer}} + return res; +} + +void vstelm_b_idx(v16i8 _1, void *_2, int var) { + __lsx_vstelm_b(_1, _2, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + __lsx_vstelm_b(_1, _2, 1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + __lsx_vstelm_b(_1, _2, 1, var); // expected-error {{argument to '__builtin_lsx_vstelm_b' must be a constant integer}} +} + +void vstelm_h_idx(v8i16 _1, void *_2, int var) { + __lsx_vstelm_h(_1, _2, 2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + __lsx_vstelm_h(_1, _2, 2, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + __lsx_vstelm_h(_1, _2, 2, var); // expected-error {{argument to '__builtin_lsx_vstelm_h' must be a constant integer}} +} + +void vstelm_w_idx(v4i32 _1, void *_2, int var) { + __lsx_vstelm_w(_1, _2, 4, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} + __lsx_vstelm_w(_1, _2, 4, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + __lsx_vstelm_w(_1, _2, 4, var); // expected-error {{argument to '__builtin_lsx_vstelm_w' must be a constant integer}} +} + +void vstelm_d_idx(v2i64 _1, void *_2, int var) { + __lsx_vstelm_d(_1, _2, 8, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 1]}} + __lsx_vstelm_d(_1, _2, 8, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} + __lsx_vstelm_d(_1, _2, 8, var); // expected-error {{argument to '__builtin_lsx_vstelm_d' must be a constant integer}} +} + +void vstelm_b(v16i8 _1, void *_2, int var) { + __lsx_vstelm_b(_1, _2, -129, 1); // expected-error {{argument value -129 is outside the valid range [-128, 127]}} + __lsx_vstelm_b(_1, _2, 128, 1); // expected-error {{argument value 128 is outside the valid range [-128, 127]}} + __lsx_vstelm_b(_1, _2, var, 1); // expected-error {{argument to '__builtin_lsx_vstelm_b' must be a constant integer}} +} + +void vstelm_h(v8i16 _1, void *_2, int var) { + __lsx_vstelm_h(_1, _2, -258, 1); // expected-error {{argument value -258 is outside the valid range [-256, 254]}} + __lsx_vstelm_h(_1, _2, 256, 1); // expected-error {{argument value 256 is outside the valid range [-256, 254]}} + __lsx_vstelm_h(_1, _2, var, 1); // expected-error {{argument to '__builtin_lsx_vstelm_h' must be a constant integer}} +} + +void vstelm_w(v4i32 _1, void *_2, int var) { + __lsx_vstelm_w(_1, _2, -516, 1); // expected-error {{argument value -516 is outside the valid range [-512, 508]}} + __lsx_vstelm_w(_1, _2, 512, 1); // expected-error {{argument value 512 is outside the valid range [-512, 508]}} + __lsx_vstelm_w(_1, _2, var, 1); // expected-error {{argument to '__builtin_lsx_vstelm_w' must be a constant integer}} +} + +void vstelm_d(v2i64 _1, void *_2, int var) { + __lsx_vstelm_d(_1, _2, -1032, 1); // expected-error {{argument value -1032 is outside the valid range [-1024, 1016]}} + __lsx_vstelm_d(_1, _2, 1024, 1); // expected-error {{argument value 1024 is outside the valid range [-1024, 1016]}} + __lsx_vstelm_d(_1, _2, var, 1); // expected-error {{argument to '__builtin_lsx_vstelm_d' must be a constant integer}} +} + +v16i8 vldrepl_b(void *_1, int var) { + v16i8 res = __lsx_vldrepl_b(_1, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} + res |= __lsx_vldrepl_b(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2047]}} + res |= __lsx_vldrepl_b(_1, var); // expected-error {{argument to '__builtin_lsx_vldrepl_b' must be a constant integer}} + return res; +} + +v8i16 vldrepl_h(void *_1, int var) { + v8i16 res = __lsx_vldrepl_h(_1, -2050); // expected-error {{argument value -2050 is outside the valid range [-2048, 2046]}} + res |= __lsx_vldrepl_h(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2046]}} + res |= __lsx_vldrepl_h(_1, var); // expected-error {{argument to '__builtin_lsx_vldrepl_h' must be a constant integer}} + return res; +} + +v4i32 vldrepl_w(void *_1, int var) { + v4i32 res = __lsx_vldrepl_w(_1, -2052); // expected-error {{argument value -2052 is outside the valid range [-2048, 2044]}} + res |= __lsx_vldrepl_w(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2044]}} + res |= __lsx_vldrepl_w(_1, var); // expected-error {{argument to '__builtin_lsx_vldrepl_w' must be a constant integer}} + return res; +} + +v2i64 vldrepl_d(void *_1, int var) { + v2i64 res = __lsx_vldrepl_d(_1, -2056); // expected-error {{argument value -2056 is outside the valid range [-2048, 2040]}} + res |= __lsx_vldrepl_d(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2040]}} + res |= __lsx_vldrepl_d(_1, var); // expected-error {{argument to '__builtin_lsx_vldrepl_d' must be a constant integer}} + return res; +} + +v16i8 vrotri_b(v16i8 _1, int var) { + v16i8 res = __lsx_vrotri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __lsx_vrotri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __lsx_vrotri_b(_1, var); // expected-error {{argument to '__builtin_lsx_vrotri_b' must be a constant integer}} + return res; +} + +v8i16 vrotri_h(v8i16 _1, int var) { + v8i16 res = __lsx_vrotri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lsx_vrotri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lsx_vrotri_h(_1, var); // expected-error {{argument to '__builtin_lsx_vrotri_h' must be a constant integer}} + return res; +} + +v4i32 vrotri_w(v4i32 _1, int var) { + v4i32 res = __lsx_vrotri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vrotri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vrotri_w(_1, var); // expected-error {{argument to '__builtin_lsx_vrotri_w' must be a constant integer}} + return res; +} + +v2i64 vrotri_d(v2i64 _1, int var) { + v2i64 res = __lsx_vrotri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lsx_vrotri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lsx_vrotri_d(_1, var); // expected-error {{argument to '__builtin_lsx_vrotri_d' must be a constant integer}} + return res; +} + +v16i8 vsrlni_b_h(v16i8 _1, v16i8 _2, int var) { + v16i8 res = __lsx_vsrlni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lsx_vsrlni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lsx_vsrlni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlni_b_h' must be a constant integer}} + return res; +} + +v8i16 vsrlni_h_w(v8i16 _1, v8i16 _2, int var) { + v8i16 res = __lsx_vsrlni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vsrlni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vsrlni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlni_h_w' must be a constant integer}} + return res; +} + +v4i32 vsrlni_w_d(v4i32 _1, v4i32 _2, int var) { + v4i32 res = __lsx_vsrlni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lsx_vsrlni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lsx_vsrlni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlni_w_d' must be a constant integer}} + return res; +} + +v2i64 vsrlni_d_q(v2i64 _1, v2i64 _2, int var) { + v2i64 res = __lsx_vsrlni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __lsx_vsrlni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __lsx_vsrlni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlni_d_q' must be a constant integer}} + return res; +} + +v16i8 vsrlrni_b_h(v16i8 _1, v16i8 _2, int var) { + v16i8 res = __lsx_vsrlrni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lsx_vsrlrni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lsx_vsrlrni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlrni_b_h' must be a constant integer}} + return res; +} + +v8i16 vsrlrni_h_w(v8i16 _1, v8i16 _2, int var) { + v8i16 res = __lsx_vsrlrni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vsrlrni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vsrlrni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlrni_h_w' must be a constant integer}} + return res; +} + +v4i32 vsrlrni_w_d(v4i32 _1, v4i32 _2, int var) { + v4i32 res = __lsx_vsrlrni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lsx_vsrlrni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lsx_vsrlrni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlrni_w_d' must be a constant integer}} + return res; +} + +v2i64 vsrlrni_d_q(v2i64 _1, v2i64 _2, int var) { + v2i64 res = __lsx_vsrlrni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __lsx_vsrlrni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __lsx_vsrlrni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlrni_d_q' must be a constant integer}} + return res; +} + +v16i8 vssrlni_b_h(v16i8 _1, v16i8 _2, int var) { + v16i8 res = __lsx_vssrlni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lsx_vssrlni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lsx_vssrlni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_b_h' must be a constant integer}} + return res; +} + +v8i16 vssrlni_h_w(v8i16 _1, v8i16 _2, int var) { + v8i16 res = __lsx_vssrlni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vssrlni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vssrlni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_h_w' must be a constant integer}} + return res; +} + +v4i32 vssrlni_w_d(v4i32 _1, v4i32 _2, int var) { + v4i32 res = __lsx_vssrlni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lsx_vssrlni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lsx_vssrlni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_w_d' must be a constant integer}} + return res; +} + +v2i64 vssrlni_d_q(v2i64 _1, v2i64 _2, int var) { + v2i64 res = __lsx_vssrlni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __lsx_vssrlni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __lsx_vssrlni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_d_q' must be a constant integer}} + return res; +} + +v16u8 vssrlni_bu_h(v16u8 _1, v16i8 _2, int var) { + v16u8 res = __lsx_vssrlni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lsx_vssrlni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lsx_vssrlni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_bu_h' must be a constant integer}} + return res; +} + +v8u16 vssrlni_hu_w(v8u16 _1, v8i16 _2, int var) { + v8u16 res = __lsx_vssrlni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vssrlni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vssrlni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_hu_w' must be a constant integer}} + return res; +} + +v4u32 vssrlni_wu_d(v4u32 _1, v4i32 _2, int var) { + v4u32 res = __lsx_vssrlni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lsx_vssrlni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lsx_vssrlni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_wu_d' must be a constant integer}} + return res; +} + +v2u64 vssrlni_du_q(v2u64 _1, v2i64 _2, int var) { + v2u64 res = __lsx_vssrlni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __lsx_vssrlni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __lsx_vssrlni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_du_q' must be a constant integer}} + return res; +} + +v16i8 vssrlrni_b_h(v16i8 _1, v16i8 _2, int var) { + v16i8 res = __lsx_vssrlrni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lsx_vssrlrni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lsx_vssrlrni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_b_h' must be a constant integer}} + return res; +} + +v8i16 vssrlrni_h_w(v8i16 _1, v8i16 _2, int var) { + v8i16 res = __lsx_vssrlrni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vssrlrni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vssrlrni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_h_w' must be a constant integer}} + return res; +} + +v4i32 vssrlrni_w_d(v4i32 _1, v4i32 _2, int var) { + v4i32 res = __lsx_vssrlrni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lsx_vssrlrni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lsx_vssrlrni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_w_d' must be a constant integer}} + return res; +} + +v2i64 vssrlrni_d_q(v2i64 _1, v2i64 _2, int var) { + v2i64 res = __lsx_vssrlrni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __lsx_vssrlrni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __lsx_vssrlrni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_d_q' must be a constant integer}} + return res; +} + +v16u8 vssrlrni_bu_h(v16u8 _1, v16i8 _2, int var) { + v16u8 res = __lsx_vssrlrni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lsx_vssrlrni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lsx_vssrlrni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_bu_h' must be a constant integer}} + return res; +} + +v8u16 vssrlrni_hu_w(v8u16 _1, v8i16 _2, int var) { + v8u16 res = __lsx_vssrlrni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vssrlrni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vssrlrni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_hu_w' must be a constant integer}} + return res; +} + +v4u32 vssrlrni_wu_d(v4u32 _1, v4i32 _2, int var) { + v4u32 res = __lsx_vssrlrni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lsx_vssrlrni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lsx_vssrlrni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_wu_d' must be a constant integer}} + return res; +} + +v2u64 vssrlrni_du_q(v2u64 _1, v2i64 _2, int var) { + v2u64 res = __lsx_vssrlrni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __lsx_vssrlrni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __lsx_vssrlrni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_du_q' must be a constant integer}} + return res; +} + +v16i8 vsrani_b_h(v16i8 _1, v16i8 _2, int var) { + v16i8 res = __lsx_vsrani_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lsx_vsrani_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lsx_vsrani_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrani_b_h' must be a constant integer}} + return res; +} + +v8i16 vsrani_h_w(v8i16 _1, v8i16 _2, int var) { + v8i16 res = __lsx_vsrani_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vsrani_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vsrani_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrani_h_w' must be a constant integer}} + return res; +} + +v4i32 vsrani_w_d(v4i32 _1, v4i32 _2, int var) { + v4i32 res = __lsx_vsrani_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lsx_vsrani_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lsx_vsrani_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrani_w_d' must be a constant integer}} + return res; +} + +v2i64 vsrani_d_q(v2i64 _1, v2i64 _2, int var) { + v2i64 res = __lsx_vsrani_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __lsx_vsrani_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __lsx_vsrani_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrani_d_q' must be a constant integer}} + return res; +} + +v16i8 vsrarni_b_h(v16i8 _1, v16i8 _2, int var) { + v16i8 res = __lsx_vsrarni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lsx_vsrarni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lsx_vsrarni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrarni_b_h' must be a constant integer}} + return res; +} + +v8i16 vsrarni_h_w(v8i16 _1, v8i16 _2, int var) { + v8i16 res = __lsx_vsrarni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vsrarni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vsrarni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrarni_h_w' must be a constant integer}} + return res; +} + +v4i32 vsrarni_w_d(v4i32 _1, v4i32 _2, int var) { + v4i32 res = __lsx_vsrarni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lsx_vsrarni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lsx_vsrarni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrarni_w_d' must be a constant integer}} + return res; +} + +v2i64 vsrarni_d_q(v2i64 _1, v2i64 _2, int var) { + v2i64 res = __lsx_vsrarni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __lsx_vsrarni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __lsx_vsrarni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrarni_d_q' must be a constant integer}} + return res; +} + +v16i8 vssrani_b_h(v16i8 _1, v16i8 _2, int var) { + v16i8 res = __lsx_vssrani_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lsx_vssrani_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lsx_vssrani_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_b_h' must be a constant integer}} + return res; +} + +v8i16 vssrani_h_w(v8i16 _1, v8i16 _2, int var) { + v8i16 res = __lsx_vssrani_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vssrani_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vssrani_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_h_w' must be a constant integer}} + return res; +} + +v4i32 vssrani_w_d(v4i32 _1, v4i32 _2, int var) { + v4i32 res = __lsx_vssrani_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lsx_vssrani_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lsx_vssrani_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_w_d' must be a constant integer}} + return res; +} + +v2i64 vssrani_d_q(v2i64 _1, v2i64 _2, int var) { + v2i64 res = __lsx_vssrani_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __lsx_vssrani_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __lsx_vssrani_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_d_q' must be a constant integer}} + return res; +} + +v16u8 vssrani_bu_h(v16u8 _1, v16i8 _2, int var) { + v16u8 res = __lsx_vssrani_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lsx_vssrani_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lsx_vssrani_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_bu_h' must be a constant integer}} + return res; +} + +v8u16 vssrani_hu_w(v8u16 _1, v8i16 _2, int var) { + v8u16 res = __lsx_vssrani_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vssrani_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vssrani_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_hu_w' must be a constant integer}} + return res; +} + +v4u32 vssrani_wu_d(v4u32 _1, v4i32 _2, int var) { + v4u32 res = __lsx_vssrani_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lsx_vssrani_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lsx_vssrani_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_wu_d' must be a constant integer}} + return res; +} + +v2u64 vssrani_du_q(v2u64 _1, v2i64 _2, int var) { + v2u64 res = __lsx_vssrani_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __lsx_vssrani_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __lsx_vssrani_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_du_q' must be a constant integer}} + return res; +} + +v16i8 vssrarni_b_h(v16i8 _1, v16i8 _2, int var) { + v16i8 res = __lsx_vssrarni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lsx_vssrarni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lsx_vssrarni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_b_h' must be a constant integer}} + return res; +} + +v8i16 vssrarni_h_w(v8i16 _1, v8i16 _2, int var) { + v8i16 res = __lsx_vssrarni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vssrarni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vssrarni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_h_w' must be a constant integer}} + return res; +} + +v4i32 vssrarni_w_d(v4i32 _1, v4i32 _2, int var) { + v4i32 res = __lsx_vssrarni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lsx_vssrarni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lsx_vssrarni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_w_d' must be a constant integer}} + return res; +} + +v2i64 vssrarni_d_q(v2i64 _1, v2i64 _2, int var) { + v2i64 res = __lsx_vssrarni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __lsx_vssrarni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __lsx_vssrarni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_d_q' must be a constant integer}} + return res; +} + +v16u8 vssrarni_bu_h(v16u8 _1, v16i8 _2, int var) { + v16u8 res = __lsx_vssrarni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __lsx_vssrarni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __lsx_vssrarni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_bu_h' must be a constant integer}} + return res; +} + +v8u16 vssrarni_hu_w(v8u16 _1, v8i16 _2, int var) { + v8u16 res = __lsx_vssrarni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __lsx_vssrarni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __lsx_vssrarni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_hu_w' must be a constant integer}} + return res; +} + +v4u32 vssrarni_wu_d(v4u32 _1, v4i32 _2, int var) { + v4u32 res = __lsx_vssrarni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __lsx_vssrarni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __lsx_vssrarni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_wu_d' must be a constant integer}} + return res; +} + +v2u64 vssrarni_du_q(v2u64 _1, v2i64 _2, int var) { + v2u64 res = __lsx_vssrarni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __lsx_vssrarni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __lsx_vssrarni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_du_q' must be a constant integer}} + return res; +} + +v4i32 vpermi_w(v4i32 _1, v4i32 _2, int var) { + v4i32 res = __lsx_vpermi_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __lsx_vpermi_w(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __lsx_vpermi_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vpermi_w' must be a constant integer}} + return res; +} + +v16i8 vld(void *_1, int var) { + v16i8 res = __lsx_vld(_1, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} + res |= __lsx_vld(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2047]}} + res |= __lsx_vld(_1, var); // expected-error {{argument to '__builtin_lsx_vld' must be a constant integer}} + return res; +} + +void vst(v16i8 _1, void *_2, int var) { + __lsx_vst(_1, _2, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} + __lsx_vst(_1, _2, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2047]}} + __lsx_vst(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vst' must be a constant integer}} +} + +v2i64 vldi(int var) { + v2i64 res = __lsx_vldi(-4097); // expected-error {{argument value -4097 is outside the valid range [-4096, 4095]}} + res |= __lsx_vldi(4096); // expected-error {{argument value 4096 is outside the valid range [-4096, 4095]}} + res |= __lsx_vldi(var); // expected-error {{argument to '__builtin_lsx_vldi' must be a constant integer}} + return res; +} + +v16i8 vrepli_b(int var) { + v16i8 res = __lsx_vrepli_b(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} + res |= __lsx_vrepli_b(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} + res |= __lsx_vrepli_b(var); // expected-error {{argument to '__builtin_lsx_vrepli_b' must be a constant integer}} + return res; +} + +v2i64 vrepli_d(int var) { + v2i64 res = __lsx_vrepli_d(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} + res |= __lsx_vrepli_d(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} + res |= __lsx_vrepli_d(var); // expected-error {{argument to '__builtin_lsx_vrepli_d' must be a constant integer}} + return res; +} + +v8i16 vrepli_h(int var) { + v8i16 res = __lsx_vrepli_h(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} + res |= __lsx_vrepli_h(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} + res |= __lsx_vrepli_h(var); // expected-error {{argument to '__builtin_lsx_vrepli_h' must be a constant integer}} + return res; +} + +v4i32 vrepli_w(int var) { + v4i32 res = __lsx_vrepli_w(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} + res |= __lsx_vrepli_w(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} + res |= __lsx_vrepli_w(var); // expected-error {{argument to '__builtin_lsx_vrepli_w' must be a constant integer}} + return res; +} diff --git a/clang/test/CodeGen/LoongArch/lsx/builtin-alias.c b/clang/test/CodeGen/LoongArch/lsx/builtin-alias.c new file mode 100644 index 0000000000000000000000000000000000000000..7a84e0ae24f950b8d898cac908d06e15474f8db8 --- /dev/null +++ b/clang/test/CodeGen/LoongArch/lsx/builtin-alias.c @@ -0,0 +1,6359 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple loongarch64 -target-feature +lsx -O2 -emit-llvm %s -o - | FileCheck %s + +#include + +// CHECK-LABEL: @vsll_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsll.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vsll_b(v16i8 _1, v16i8 _2) { return __lsx_vsll_b(_1, _2); } +// CHECK-LABEL: @vsll_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsll.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vsll_h(v8i16 _1, v8i16 _2) { return __lsx_vsll_h(_1, _2); } +// CHECK-LABEL: @vsll_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsll.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vsll_w(v4i32 _1, v4i32 _2) { return __lsx_vsll_w(_1, _2); } +// CHECK-LABEL: @vsll_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsll.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vsll_d(v2i64 _1, v2i64 _2) { return __lsx_vsll_d(_1, _2); } +// CHECK-LABEL: @vslli_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslli.b(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vslli_b(v16i8 _1) { return __lsx_vslli_b(_1, 1); } +// CHECK-LABEL: @vslli_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslli.h(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vslli_h(v8i16 _1) { return __lsx_vslli_h(_1, 1); } +// CHECK-LABEL: @vslli_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslli.w(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vslli_w(v4i32 _1) { return __lsx_vslli_w(_1, 1); } +// CHECK-LABEL: @vslli_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslli.d(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vslli_d(v2i64 _1) { return __lsx_vslli_d(_1, 1); } +// CHECK-LABEL: @vsra_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsra.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vsra_b(v16i8 _1, v16i8 _2) { return __lsx_vsra_b(_1, _2); } +// CHECK-LABEL: @vsra_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsra.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vsra_h(v8i16 _1, v8i16 _2) { return __lsx_vsra_h(_1, _2); } +// CHECK-LABEL: @vsra_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsra.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vsra_w(v4i32 _1, v4i32 _2) { return __lsx_vsra_w(_1, _2); } +// CHECK-LABEL: @vsra_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsra.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vsra_d(v2i64 _1, v2i64 _2) { return __lsx_vsra_d(_1, _2); } +// CHECK-LABEL: @vsrai_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrai.b(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vsrai_b(v16i8 _1) { return __lsx_vsrai_b(_1, 1); } +// CHECK-LABEL: @vsrai_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrai.h(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vsrai_h(v8i16 _1) { return __lsx_vsrai_h(_1, 1); } +// CHECK-LABEL: @vsrai_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrai.w(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vsrai_w(v4i32 _1) { return __lsx_vsrai_w(_1, 1); } +// CHECK-LABEL: @vsrai_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrai.d(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vsrai_d(v2i64 _1) { return __lsx_vsrai_d(_1, 1); } +// CHECK-LABEL: @vsrar_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrar.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vsrar_b(v16i8 _1, v16i8 _2) { return __lsx_vsrar_b(_1, _2); } +// CHECK-LABEL: @vsrar_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrar.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vsrar_h(v8i16 _1, v8i16 _2) { return __lsx_vsrar_h(_1, _2); } +// CHECK-LABEL: @vsrar_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrar.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vsrar_w(v4i32 _1, v4i32 _2) { return __lsx_vsrar_w(_1, _2); } +// CHECK-LABEL: @vsrar_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrar.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vsrar_d(v2i64 _1, v2i64 _2) { return __lsx_vsrar_d(_1, _2); } +// CHECK-LABEL: @vsrari_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrari.b(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vsrari_b(v16i8 _1) { return __lsx_vsrari_b(_1, 1); } +// CHECK-LABEL: @vsrari_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrari.h(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vsrari_h(v8i16 _1) { return __lsx_vsrari_h(_1, 1); } +// CHECK-LABEL: @vsrari_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrari.w(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vsrari_w(v4i32 _1) { return __lsx_vsrari_w(_1, 1); } +// CHECK-LABEL: @vsrari_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrari.d(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vsrari_d(v2i64 _1) { return __lsx_vsrari_d(_1, 1); } +// CHECK-LABEL: @vsrl_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrl.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vsrl_b(v16i8 _1, v16i8 _2) { return __lsx_vsrl_b(_1, _2); } +// CHECK-LABEL: @vsrl_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrl.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vsrl_h(v8i16 _1, v8i16 _2) { return __lsx_vsrl_h(_1, _2); } +// CHECK-LABEL: @vsrl_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrl.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vsrl_w(v4i32 _1, v4i32 _2) { return __lsx_vsrl_w(_1, _2); } +// CHECK-LABEL: @vsrl_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrl.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vsrl_d(v2i64 _1, v2i64 _2) { return __lsx_vsrl_d(_1, _2); } +// CHECK-LABEL: @vsrli_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrli.b(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vsrli_b(v16i8 _1) { return __lsx_vsrli_b(_1, 1); } +// CHECK-LABEL: @vsrli_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrli.h(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vsrli_h(v8i16 _1) { return __lsx_vsrli_h(_1, 1); } +// CHECK-LABEL: @vsrli_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrli.w(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vsrli_w(v4i32 _1) { return __lsx_vsrli_w(_1, 1); } +// CHECK-LABEL: @vsrli_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrli.d(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vsrli_d(v2i64 _1) { return __lsx_vsrli_d(_1, 1); } +// CHECK-LABEL: @vsrlr_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlr.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vsrlr_b(v16i8 _1, v16i8 _2) { return __lsx_vsrlr_b(_1, _2); } +// CHECK-LABEL: @vsrlr_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlr.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vsrlr_h(v8i16 _1, v8i16 _2) { return __lsx_vsrlr_h(_1, _2); } +// CHECK-LABEL: @vsrlr_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlr.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vsrlr_w(v4i32 _1, v4i32 _2) { return __lsx_vsrlr_w(_1, _2); } +// CHECK-LABEL: @vsrlr_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlr.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vsrlr_d(v2i64 _1, v2i64 _2) { return __lsx_vsrlr_d(_1, _2); } +// CHECK-LABEL: @vsrlri_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlri.b(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vsrlri_b(v16i8 _1) { return __lsx_vsrlri_b(_1, 1); } +// CHECK-LABEL: @vsrlri_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlri.h(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vsrlri_h(v8i16 _1) { return __lsx_vsrlri_h(_1, 1); } +// CHECK-LABEL: @vsrlri_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlri.w(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vsrlri_w(v4i32 _1) { return __lsx_vsrlri_w(_1, 1); } +// CHECK-LABEL: @vsrlri_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlri.d(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vsrlri_d(v2i64 _1) { return __lsx_vsrlri_d(_1, 1); } +// CHECK-LABEL: @vbitclr_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitclr.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vbitclr_b(v16u8 _1, v16u8 _2) { return __lsx_vbitclr_b(_1, _2); } +// CHECK-LABEL: @vbitclr_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitclr.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8u16 vbitclr_h(v8u16 _1, v8u16 _2) { return __lsx_vbitclr_h(_1, _2); } +// CHECK-LABEL: @vbitclr_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitclr.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4u32 vbitclr_w(v4u32 _1, v4u32 _2) { return __lsx_vbitclr_w(_1, _2); } +// CHECK-LABEL: @vbitclr_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitclr.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2u64 vbitclr_d(v2u64 _1, v2u64 _2) { return __lsx_vbitclr_d(_1, _2); } +// CHECK-LABEL: @vbitclri_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitclri.b(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16u8 vbitclri_b(v16u8 _1) { return __lsx_vbitclri_b(_1, 1); } +// CHECK-LABEL: @vbitclri_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitclri.h(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8u16 vbitclri_h(v8u16 _1) { return __lsx_vbitclri_h(_1, 1); } +// CHECK-LABEL: @vbitclri_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitclri.w(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4u32 vbitclri_w(v4u32 _1) { return __lsx_vbitclri_w(_1, 1); } +// CHECK-LABEL: @vbitclri_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitclri.d(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2u64 vbitclri_d(v2u64 _1) { return __lsx_vbitclri_d(_1, 1); } +// CHECK-LABEL: @vbitset_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitset.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vbitset_b(v16u8 _1, v16u8 _2) { return __lsx_vbitset_b(_1, _2); } +// CHECK-LABEL: @vbitset_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitset.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8u16 vbitset_h(v8u16 _1, v8u16 _2) { return __lsx_vbitset_h(_1, _2); } +// CHECK-LABEL: @vbitset_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitset.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4u32 vbitset_w(v4u32 _1, v4u32 _2) { return __lsx_vbitset_w(_1, _2); } +// CHECK-LABEL: @vbitset_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitset.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2u64 vbitset_d(v2u64 _1, v2u64 _2) { return __lsx_vbitset_d(_1, _2); } +// CHECK-LABEL: @vbitseti_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitseti.b(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16u8 vbitseti_b(v16u8 _1) { return __lsx_vbitseti_b(_1, 1); } +// CHECK-LABEL: @vbitseti_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitseti.h(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8u16 vbitseti_h(v8u16 _1) { return __lsx_vbitseti_h(_1, 1); } +// CHECK-LABEL: @vbitseti_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitseti.w(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4u32 vbitseti_w(v4u32 _1) { return __lsx_vbitseti_w(_1, 1); } +// CHECK-LABEL: @vbitseti_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitseti.d(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2u64 vbitseti_d(v2u64 _1) { return __lsx_vbitseti_d(_1, 1); } +// CHECK-LABEL: @vbitrev_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitrev.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vbitrev_b(v16u8 _1, v16u8 _2) { return __lsx_vbitrev_b(_1, _2); } +// CHECK-LABEL: @vbitrev_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitrev.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8u16 vbitrev_h(v8u16 _1, v8u16 _2) { return __lsx_vbitrev_h(_1, _2); } +// CHECK-LABEL: @vbitrev_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitrev.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4u32 vbitrev_w(v4u32 _1, v4u32 _2) { return __lsx_vbitrev_w(_1, _2); } +// CHECK-LABEL: @vbitrev_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitrev.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2u64 vbitrev_d(v2u64 _1, v2u64 _2) { return __lsx_vbitrev_d(_1, _2); } +// CHECK-LABEL: @vbitrevi_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitrevi.b(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16u8 vbitrevi_b(v16u8 _1) { return __lsx_vbitrevi_b(_1, 1); } +// CHECK-LABEL: @vbitrevi_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitrevi.h(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8u16 vbitrevi_h(v8u16 _1) { return __lsx_vbitrevi_h(_1, 1); } +// CHECK-LABEL: @vbitrevi_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitrevi.w(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4u32 vbitrevi_w(v4u32 _1) { return __lsx_vbitrevi_w(_1, 1); } +// CHECK-LABEL: @vbitrevi_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitrevi.d(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2u64 vbitrevi_d(v2u64 _1) { return __lsx_vbitrevi_d(_1, 1); } +// CHECK-LABEL: @vadd_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vadd.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vadd_b(v16i8 _1, v16i8 _2) { return __lsx_vadd_b(_1, _2); } +// CHECK-LABEL: @vadd_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vadd.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vadd_h(v8i16 _1, v8i16 _2) { return __lsx_vadd_h(_1, _2); } +// CHECK-LABEL: @vadd_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vadd.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vadd_w(v4i32 _1, v4i32 _2) { return __lsx_vadd_w(_1, _2); } +// CHECK-LABEL: @vadd_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vadd.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vadd_d(v2i64 _1, v2i64 _2) { return __lsx_vadd_d(_1, _2); } +// CHECK-LABEL: @vaddi_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vaddi.bu(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vaddi_bu(v16i8 _1) { return __lsx_vaddi_bu(_1, 1); } +// CHECK-LABEL: @vaddi_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddi.hu(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vaddi_hu(v8i16 _1) { return __lsx_vaddi_hu(_1, 1); } +// CHECK-LABEL: @vaddi_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddi.wu(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vaddi_wu(v4i32 _1) { return __lsx_vaddi_wu(_1, 1); } +// CHECK-LABEL: @vaddi_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddi.du(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vaddi_du(v2i64 _1) { return __lsx_vaddi_du(_1, 1); } +// CHECK-LABEL: @vsub_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsub.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vsub_b(v16i8 _1, v16i8 _2) { return __lsx_vsub_b(_1, _2); } +// CHECK-LABEL: @vsub_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsub.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vsub_h(v8i16 _1, v8i16 _2) { return __lsx_vsub_h(_1, _2); } +// CHECK-LABEL: @vsub_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsub.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vsub_w(v4i32 _1, v4i32 _2) { return __lsx_vsub_w(_1, _2); } +// CHECK-LABEL: @vsub_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsub.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vsub_d(v2i64 _1, v2i64 _2) { return __lsx_vsub_d(_1, _2); } +// CHECK-LABEL: @vsubi_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsubi.bu(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vsubi_bu(v16i8 _1) { return __lsx_vsubi_bu(_1, 1); } +// CHECK-LABEL: @vsubi_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubi.hu(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vsubi_hu(v8i16 _1) { return __lsx_vsubi_hu(_1, 1); } +// CHECK-LABEL: @vsubi_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubi.wu(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vsubi_wu(v4i32 _1) { return __lsx_vsubi_wu(_1, 1); } +// CHECK-LABEL: @vsubi_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubi.du(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vsubi_du(v2i64 _1) { return __lsx_vsubi_du(_1, 1); } +// CHECK-LABEL: @vmax_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmax.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vmax_b(v16i8 _1, v16i8 _2) { return __lsx_vmax_b(_1, _2); } +// CHECK-LABEL: @vmax_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmax.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vmax_h(v8i16 _1, v8i16 _2) { return __lsx_vmax_h(_1, _2); } +// CHECK-LABEL: @vmax_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmax.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vmax_w(v4i32 _1, v4i32 _2) { return __lsx_vmax_w(_1, _2); } +// CHECK-LABEL: @vmax_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmax.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vmax_d(v2i64 _1, v2i64 _2) { return __lsx_vmax_d(_1, _2); } +// CHECK-LABEL: @vmaxi_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmaxi.b(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vmaxi_b(v16i8 _1) { return __lsx_vmaxi_b(_1, 1); } +// CHECK-LABEL: @vmaxi_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaxi.h(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vmaxi_h(v8i16 _1) { return __lsx_vmaxi_h(_1, 1); } +// CHECK-LABEL: @vmaxi_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaxi.w(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vmaxi_w(v4i32 _1) { return __lsx_vmaxi_w(_1, 1); } +// CHECK-LABEL: @vmaxi_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaxi.d(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vmaxi_d(v2i64 _1) { return __lsx_vmaxi_d(_1, 1); } +// CHECK-LABEL: @vmax_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmax.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vmax_bu(v16u8 _1, v16u8 _2) { return __lsx_vmax_bu(_1, _2); } +// CHECK-LABEL: @vmax_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmax.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8u16 vmax_hu(v8u16 _1, v8u16 _2) { return __lsx_vmax_hu(_1, _2); } +// CHECK-LABEL: @vmax_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmax.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4u32 vmax_wu(v4u32 _1, v4u32 _2) { return __lsx_vmax_wu(_1, _2); } +// CHECK-LABEL: @vmax_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmax.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2u64 vmax_du(v2u64 _1, v2u64 _2) { return __lsx_vmax_du(_1, _2); } +// CHECK-LABEL: @vmaxi_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmaxi.bu(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16u8 vmaxi_bu(v16u8 _1) { return __lsx_vmaxi_bu(_1, 1); } +// CHECK-LABEL: @vmaxi_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaxi.hu(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8u16 vmaxi_hu(v8u16 _1) { return __lsx_vmaxi_hu(_1, 1); } +// CHECK-LABEL: @vmaxi_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaxi.wu(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4u32 vmaxi_wu(v4u32 _1) { return __lsx_vmaxi_wu(_1, 1); } +// CHECK-LABEL: @vmaxi_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaxi.du(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2u64 vmaxi_du(v2u64 _1) { return __lsx_vmaxi_du(_1, 1); } +// CHECK-LABEL: @vmin_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmin.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vmin_b(v16i8 _1, v16i8 _2) { return __lsx_vmin_b(_1, _2); } +// CHECK-LABEL: @vmin_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmin.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vmin_h(v8i16 _1, v8i16 _2) { return __lsx_vmin_h(_1, _2); } +// CHECK-LABEL: @vmin_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmin.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vmin_w(v4i32 _1, v4i32 _2) { return __lsx_vmin_w(_1, _2); } +// CHECK-LABEL: @vmin_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmin.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vmin_d(v2i64 _1, v2i64 _2) { return __lsx_vmin_d(_1, _2); } +// CHECK-LABEL: @vmini_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmini.b(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vmini_b(v16i8 _1) { return __lsx_vmini_b(_1, 1); } +// CHECK-LABEL: @vmini_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmini.h(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vmini_h(v8i16 _1) { return __lsx_vmini_h(_1, 1); } +// CHECK-LABEL: @vmini_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmini.w(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vmini_w(v4i32 _1) { return __lsx_vmini_w(_1, 1); } +// CHECK-LABEL: @vmini_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmini.d(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vmini_d(v2i64 _1) { return __lsx_vmini_d(_1, 1); } +// CHECK-LABEL: @vmin_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmin.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vmin_bu(v16u8 _1, v16u8 _2) { return __lsx_vmin_bu(_1, _2); } +// CHECK-LABEL: @vmin_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmin.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8u16 vmin_hu(v8u16 _1, v8u16 _2) { return __lsx_vmin_hu(_1, _2); } +// CHECK-LABEL: @vmin_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmin.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4u32 vmin_wu(v4u32 _1, v4u32 _2) { return __lsx_vmin_wu(_1, _2); } +// CHECK-LABEL: @vmin_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmin.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2u64 vmin_du(v2u64 _1, v2u64 _2) { return __lsx_vmin_du(_1, _2); } +// CHECK-LABEL: @vmini_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmini.bu(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16u8 vmini_bu(v16u8 _1) { return __lsx_vmini_bu(_1, 1); } +// CHECK-LABEL: @vmini_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmini.hu(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8u16 vmini_hu(v8u16 _1) { return __lsx_vmini_hu(_1, 1); } +// CHECK-LABEL: @vmini_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmini.wu(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4u32 vmini_wu(v4u32 _1) { return __lsx_vmini_wu(_1, 1); } +// CHECK-LABEL: @vmini_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmini.du(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2u64 vmini_du(v2u64 _1) { return __lsx_vmini_du(_1, 1); } +// CHECK-LABEL: @vseq_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vseq.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vseq_b(v16i8 _1, v16i8 _2) { return __lsx_vseq_b(_1, _2); } +// CHECK-LABEL: @vseq_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vseq.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vseq_h(v8i16 _1, v8i16 _2) { return __lsx_vseq_h(_1, _2); } +// CHECK-LABEL: @vseq_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vseq.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vseq_w(v4i32 _1, v4i32 _2) { return __lsx_vseq_w(_1, _2); } +// CHECK-LABEL: @vseq_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vseq.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vseq_d(v2i64 _1, v2i64 _2) { return __lsx_vseq_d(_1, _2); } +// CHECK-LABEL: @vseqi_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vseqi.b(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vseqi_b(v16i8 _1) { return __lsx_vseqi_b(_1, 1); } +// CHECK-LABEL: @vseqi_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vseqi.h(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vseqi_h(v8i16 _1) { return __lsx_vseqi_h(_1, 1); } +// CHECK-LABEL: @vseqi_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vseqi.w(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vseqi_w(v4i32 _1) { return __lsx_vseqi_w(_1, 1); } +// CHECK-LABEL: @vseqi_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vseqi.d(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vseqi_d(v2i64 _1) { return __lsx_vseqi_d(_1, 1); } +// CHECK-LABEL: @vslti_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslti.b(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vslti_b(v16i8 _1) { return __lsx_vslti_b(_1, 1); } +// CHECK-LABEL: @vslt_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslt.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vslt_b(v16i8 _1, v16i8 _2) { return __lsx_vslt_b(_1, _2); } +// CHECK-LABEL: @vslt_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslt.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vslt_h(v8i16 _1, v8i16 _2) { return __lsx_vslt_h(_1, _2); } +// CHECK-LABEL: @vslt_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslt.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vslt_w(v4i32 _1, v4i32 _2) { return __lsx_vslt_w(_1, _2); } +// CHECK-LABEL: @vslt_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslt.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vslt_d(v2i64 _1, v2i64 _2) { return __lsx_vslt_d(_1, _2); } +// CHECK-LABEL: @vslti_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslti.h(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vslti_h(v8i16 _1) { return __lsx_vslti_h(_1, 1); } +// CHECK-LABEL: @vslti_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslti.w(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vslti_w(v4i32 _1) { return __lsx_vslti_w(_1, 1); } +// CHECK-LABEL: @vslti_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslti.d(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vslti_d(v2i64 _1) { return __lsx_vslti_d(_1, 1); } +// CHECK-LABEL: @vslt_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslt.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vslt_bu(v16u8 _1, v16u8 _2) { return __lsx_vslt_bu(_1, _2); } +// CHECK-LABEL: @vslt_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslt.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vslt_hu(v8u16 _1, v8u16 _2) { return __lsx_vslt_hu(_1, _2); } +// CHECK-LABEL: @vslt_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslt.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vslt_wu(v4u32 _1, v4u32 _2) { return __lsx_vslt_wu(_1, _2); } +// CHECK-LABEL: @vslt_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslt.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vslt_du(v2u64 _1, v2u64 _2) { return __lsx_vslt_du(_1, _2); } +// CHECK-LABEL: @vslti_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslti.bu(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vslti_bu(v16u8 _1) { return __lsx_vslti_bu(_1, 1); } +// CHECK-LABEL: @vslti_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslti.hu(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vslti_hu(v8u16 _1) { return __lsx_vslti_hu(_1, 1); } +// CHECK-LABEL: @vslti_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslti.wu(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vslti_wu(v4u32 _1) { return __lsx_vslti_wu(_1, 1); } +// CHECK-LABEL: @vslti_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslti.du(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vslti_du(v2u64 _1) { return __lsx_vslti_du(_1, 1); } +// CHECK-LABEL: @vsle_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsle.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vsle_b(v16i8 _1, v16i8 _2) { return __lsx_vsle_b(_1, _2); } +// CHECK-LABEL: @vsle_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsle.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vsle_h(v8i16 _1, v8i16 _2) { return __lsx_vsle_h(_1, _2); } +// CHECK-LABEL: @vsle_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsle.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vsle_w(v4i32 _1, v4i32 _2) { return __lsx_vsle_w(_1, _2); } +// CHECK-LABEL: @vsle_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsle.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vsle_d(v2i64 _1, v2i64 _2) { return __lsx_vsle_d(_1, _2); } +// CHECK-LABEL: @vslei_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslei.b(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vslei_b(v16i8 _1) { return __lsx_vslei_b(_1, 1); } +// CHECK-LABEL: @vslei_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslei.h(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vslei_h(v8i16 _1) { return __lsx_vslei_h(_1, 1); } +// CHECK-LABEL: @vslei_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslei.w(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vslei_w(v4i32 _1) { return __lsx_vslei_w(_1, 1); } +// CHECK-LABEL: @vslei_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslei.d(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vslei_d(v2i64 _1) { return __lsx_vslei_d(_1, 1); } +// CHECK-LABEL: @vsle_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsle.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vsle_bu(v16u8 _1, v16u8 _2) { return __lsx_vsle_bu(_1, _2); } +// CHECK-LABEL: @vsle_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsle.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vsle_hu(v8u16 _1, v8u16 _2) { return __lsx_vsle_hu(_1, _2); } +// CHECK-LABEL: @vsle_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsle.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vsle_wu(v4u32 _1, v4u32 _2) { return __lsx_vsle_wu(_1, _2); } +// CHECK-LABEL: @vsle_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsle.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vsle_du(v2u64 _1, v2u64 _2) { return __lsx_vsle_du(_1, _2); } +// CHECK-LABEL: @vslei_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslei.bu(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vslei_bu(v16u8 _1) { return __lsx_vslei_bu(_1, 1); } +// CHECK-LABEL: @vslei_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslei.hu(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vslei_hu(v8u16 _1) { return __lsx_vslei_hu(_1, 1); } +// CHECK-LABEL: @vslei_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslei.wu(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vslei_wu(v4u32 _1) { return __lsx_vslei_wu(_1, 1); } +// CHECK-LABEL: @vslei_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslei.du(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vslei_du(v2u64 _1) { return __lsx_vslei_du(_1, 1); } +// CHECK-LABEL: @vsat_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsat.b(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vsat_b(v16i8 _1) { return __lsx_vsat_b(_1, 1); } +// CHECK-LABEL: @vsat_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsat.h(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vsat_h(v8i16 _1) { return __lsx_vsat_h(_1, 1); } +// CHECK-LABEL: @vsat_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsat.w(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vsat_w(v4i32 _1) { return __lsx_vsat_w(_1, 1); } +// CHECK-LABEL: @vsat_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsat.d(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vsat_d(v2i64 _1) { return __lsx_vsat_d(_1, 1); } +// CHECK-LABEL: @vsat_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsat.bu(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16u8 vsat_bu(v16u8 _1) { return __lsx_vsat_bu(_1, 1); } +// CHECK-LABEL: @vsat_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsat.hu(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8u16 vsat_hu(v8u16 _1) { return __lsx_vsat_hu(_1, 1); } +// CHECK-LABEL: @vsat_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsat.wu(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4u32 vsat_wu(v4u32 _1) { return __lsx_vsat_wu(_1, 1); } +// CHECK-LABEL: @vsat_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsat.du(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2u64 vsat_du(v2u64 _1) { return __lsx_vsat_du(_1, 1); } +// CHECK-LABEL: @vadda_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vadda.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vadda_b(v16i8 _1, v16i8 _2) { return __lsx_vadda_b(_1, _2); } +// CHECK-LABEL: @vadda_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vadda.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vadda_h(v8i16 _1, v8i16 _2) { return __lsx_vadda_h(_1, _2); } +// CHECK-LABEL: @vadda_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vadda.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vadda_w(v4i32 _1, v4i32 _2) { return __lsx_vadda_w(_1, _2); } +// CHECK-LABEL: @vadda_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vadda.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vadda_d(v2i64 _1, v2i64 _2) { return __lsx_vadda_d(_1, _2); } +// CHECK-LABEL: @vsadd_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsadd.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vsadd_b(v16i8 _1, v16i8 _2) { return __lsx_vsadd_b(_1, _2); } +// CHECK-LABEL: @vsadd_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsadd.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vsadd_h(v8i16 _1, v8i16 _2) { return __lsx_vsadd_h(_1, _2); } +// CHECK-LABEL: @vsadd_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsadd.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vsadd_w(v4i32 _1, v4i32 _2) { return __lsx_vsadd_w(_1, _2); } +// CHECK-LABEL: @vsadd_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsadd.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vsadd_d(v2i64 _1, v2i64 _2) { return __lsx_vsadd_d(_1, _2); } +// CHECK-LABEL: @vsadd_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsadd.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vsadd_bu(v16u8 _1, v16u8 _2) { return __lsx_vsadd_bu(_1, _2); } +// CHECK-LABEL: @vsadd_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsadd.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8u16 vsadd_hu(v8u16 _1, v8u16 _2) { return __lsx_vsadd_hu(_1, _2); } +// CHECK-LABEL: @vsadd_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsadd.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4u32 vsadd_wu(v4u32 _1, v4u32 _2) { return __lsx_vsadd_wu(_1, _2); } +// CHECK-LABEL: @vsadd_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsadd.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2u64 vsadd_du(v2u64 _1, v2u64 _2) { return __lsx_vsadd_du(_1, _2); } +// CHECK-LABEL: @vavg_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavg.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vavg_b(v16i8 _1, v16i8 _2) { return __lsx_vavg_b(_1, _2); } +// CHECK-LABEL: @vavg_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavg.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vavg_h(v8i16 _1, v8i16 _2) { return __lsx_vavg_h(_1, _2); } +// CHECK-LABEL: @vavg_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavg.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vavg_w(v4i32 _1, v4i32 _2) { return __lsx_vavg_w(_1, _2); } +// CHECK-LABEL: @vavg_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavg.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vavg_d(v2i64 _1, v2i64 _2) { return __lsx_vavg_d(_1, _2); } +// CHECK-LABEL: @vavg_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavg.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vavg_bu(v16u8 _1, v16u8 _2) { return __lsx_vavg_bu(_1, _2); } +// CHECK-LABEL: @vavg_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavg.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8u16 vavg_hu(v8u16 _1, v8u16 _2) { return __lsx_vavg_hu(_1, _2); } +// CHECK-LABEL: @vavg_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavg.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4u32 vavg_wu(v4u32 _1, v4u32 _2) { return __lsx_vavg_wu(_1, _2); } +// CHECK-LABEL: @vavg_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavg.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2u64 vavg_du(v2u64 _1, v2u64 _2) { return __lsx_vavg_du(_1, _2); } +// CHECK-LABEL: @vavgr_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavgr.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vavgr_b(v16i8 _1, v16i8 _2) { return __lsx_vavgr_b(_1, _2); } +// CHECK-LABEL: @vavgr_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavgr.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vavgr_h(v8i16 _1, v8i16 _2) { return __lsx_vavgr_h(_1, _2); } +// CHECK-LABEL: @vavgr_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavgr.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vavgr_w(v4i32 _1, v4i32 _2) { return __lsx_vavgr_w(_1, _2); } +// CHECK-LABEL: @vavgr_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavgr.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vavgr_d(v2i64 _1, v2i64 _2) { return __lsx_vavgr_d(_1, _2); } +// CHECK-LABEL: @vavgr_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavgr.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vavgr_bu(v16u8 _1, v16u8 _2) { return __lsx_vavgr_bu(_1, _2); } +// CHECK-LABEL: @vavgr_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavgr.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8u16 vavgr_hu(v8u16 _1, v8u16 _2) { return __lsx_vavgr_hu(_1, _2); } +// CHECK-LABEL: @vavgr_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavgr.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4u32 vavgr_wu(v4u32 _1, v4u32 _2) { return __lsx_vavgr_wu(_1, _2); } +// CHECK-LABEL: @vavgr_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavgr.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2u64 vavgr_du(v2u64 _1, v2u64 _2) { return __lsx_vavgr_du(_1, _2); } +// CHECK-LABEL: @vssub_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssub.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vssub_b(v16i8 _1, v16i8 _2) { return __lsx_vssub_b(_1, _2); } +// CHECK-LABEL: @vssub_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssub.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vssub_h(v8i16 _1, v8i16 _2) { return __lsx_vssub_h(_1, _2); } +// CHECK-LABEL: @vssub_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssub.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vssub_w(v4i32 _1, v4i32 _2) { return __lsx_vssub_w(_1, _2); } +// CHECK-LABEL: @vssub_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssub.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vssub_d(v2i64 _1, v2i64 _2) { return __lsx_vssub_d(_1, _2); } +// CHECK-LABEL: @vssub_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssub.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vssub_bu(v16u8 _1, v16u8 _2) { return __lsx_vssub_bu(_1, _2); } +// CHECK-LABEL: @vssub_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssub.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8u16 vssub_hu(v8u16 _1, v8u16 _2) { return __lsx_vssub_hu(_1, _2); } +// CHECK-LABEL: @vssub_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssub.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4u32 vssub_wu(v4u32 _1, v4u32 _2) { return __lsx_vssub_wu(_1, _2); } +// CHECK-LABEL: @vssub_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssub.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2u64 vssub_du(v2u64 _1, v2u64 _2) { return __lsx_vssub_du(_1, _2); } +// CHECK-LABEL: @vabsd_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vabsd.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vabsd_b(v16i8 _1, v16i8 _2) { return __lsx_vabsd_b(_1, _2); } +// CHECK-LABEL: @vabsd_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vabsd.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vabsd_h(v8i16 _1, v8i16 _2) { return __lsx_vabsd_h(_1, _2); } +// CHECK-LABEL: @vabsd_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vabsd.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vabsd_w(v4i32 _1, v4i32 _2) { return __lsx_vabsd_w(_1, _2); } +// CHECK-LABEL: @vabsd_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vabsd.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vabsd_d(v2i64 _1, v2i64 _2) { return __lsx_vabsd_d(_1, _2); } +// CHECK-LABEL: @vabsd_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vabsd.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vabsd_bu(v16u8 _1, v16u8 _2) { return __lsx_vabsd_bu(_1, _2); } +// CHECK-LABEL: @vabsd_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vabsd.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8u16 vabsd_hu(v8u16 _1, v8u16 _2) { return __lsx_vabsd_hu(_1, _2); } +// CHECK-LABEL: @vabsd_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vabsd.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4u32 vabsd_wu(v4u32 _1, v4u32 _2) { return __lsx_vabsd_wu(_1, _2); } +// CHECK-LABEL: @vabsd_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vabsd.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2u64 vabsd_du(v2u64 _1, v2u64 _2) { return __lsx_vabsd_du(_1, _2); } +// CHECK-LABEL: @vmul_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmul.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vmul_b(v16i8 _1, v16i8 _2) { return __lsx_vmul_b(_1, _2); } +// CHECK-LABEL: @vmul_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmul.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vmul_h(v8i16 _1, v8i16 _2) { return __lsx_vmul_h(_1, _2); } +// CHECK-LABEL: @vmul_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmul.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vmul_w(v4i32 _1, v4i32 _2) { return __lsx_vmul_w(_1, _2); } +// CHECK-LABEL: @vmul_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmul.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vmul_d(v2i64 _1, v2i64 _2) { return __lsx_vmul_d(_1, _2); } +// CHECK-LABEL: @vmadd_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmadd.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v16i8 vmadd_b(v16i8 _1, v16i8 _2, v16i8 _3) { + return __lsx_vmadd_b(_1, _2, _3); +} +// CHECK-LABEL: @vmadd_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmadd.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v8i16 vmadd_h(v8i16 _1, v8i16 _2, v8i16 _3) { + return __lsx_vmadd_h(_1, _2, _3); +} +// CHECK-LABEL: @vmadd_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmadd.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v4i32 vmadd_w(v4i32 _1, v4i32 _2, v4i32 _3) { + return __lsx_vmadd_w(_1, _2, _3); +} +// CHECK-LABEL: @vmadd_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmadd.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v2i64 vmadd_d(v2i64 _1, v2i64 _2, v2i64 _3) { + return __lsx_vmadd_d(_1, _2, _3); +} +// CHECK-LABEL: @vmsub_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmsub.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v16i8 vmsub_b(v16i8 _1, v16i8 _2, v16i8 _3) { + return __lsx_vmsub_b(_1, _2, _3); +} +// CHECK-LABEL: @vmsub_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmsub.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v8i16 vmsub_h(v8i16 _1, v8i16 _2, v8i16 _3) { + return __lsx_vmsub_h(_1, _2, _3); +} +// CHECK-LABEL: @vmsub_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmsub.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v4i32 vmsub_w(v4i32 _1, v4i32 _2, v4i32 _3) { + return __lsx_vmsub_w(_1, _2, _3); +} +// CHECK-LABEL: @vmsub_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmsub.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v2i64 vmsub_d(v2i64 _1, v2i64 _2, v2i64 _3) { + return __lsx_vmsub_d(_1, _2, _3); +} +// CHECK-LABEL: @vdiv_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vdiv.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vdiv_b(v16i8 _1, v16i8 _2) { return __lsx_vdiv_b(_1, _2); } +// CHECK-LABEL: @vdiv_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vdiv.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vdiv_h(v8i16 _1, v8i16 _2) { return __lsx_vdiv_h(_1, _2); } +// CHECK-LABEL: @vdiv_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vdiv.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vdiv_w(v4i32 _1, v4i32 _2) { return __lsx_vdiv_w(_1, _2); } +// CHECK-LABEL: @vdiv_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vdiv.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vdiv_d(v2i64 _1, v2i64 _2) { return __lsx_vdiv_d(_1, _2); } +// CHECK-LABEL: @vdiv_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vdiv.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vdiv_bu(v16u8 _1, v16u8 _2) { return __lsx_vdiv_bu(_1, _2); } +// CHECK-LABEL: @vdiv_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vdiv.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8u16 vdiv_hu(v8u16 _1, v8u16 _2) { return __lsx_vdiv_hu(_1, _2); } +// CHECK-LABEL: @vdiv_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vdiv.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4u32 vdiv_wu(v4u32 _1, v4u32 _2) { return __lsx_vdiv_wu(_1, _2); } +// CHECK-LABEL: @vdiv_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vdiv.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2u64 vdiv_du(v2u64 _1, v2u64 _2) { return __lsx_vdiv_du(_1, _2); } +// CHECK-LABEL: @vhaddw_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhaddw.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vhaddw_h_b(v16i8 _1, v16i8 _2) { return __lsx_vhaddw_h_b(_1, _2); } +// CHECK-LABEL: @vhaddw_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhaddw.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vhaddw_w_h(v8i16 _1, v8i16 _2) { return __lsx_vhaddw_w_h(_1, _2); } +// CHECK-LABEL: @vhaddw_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vhaddw_d_w(v4i32 _1, v4i32 _2) { return __lsx_vhaddw_d_w(_1, _2); } +// CHECK-LABEL: @vhaddw_hu_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhaddw.hu.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8u16 vhaddw_hu_bu(v16u8 _1, v16u8 _2) { return __lsx_vhaddw_hu_bu(_1, _2); } +// CHECK-LABEL: @vhaddw_wu_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhaddw.wu.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4u32 vhaddw_wu_hu(v8u16 _1, v8u16 _2) { return __lsx_vhaddw_wu_hu(_1, _2); } +// CHECK-LABEL: @vhaddw_du_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.du.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2u64 vhaddw_du_wu(v4u32 _1, v4u32 _2) { return __lsx_vhaddw_du_wu(_1, _2); } +// CHECK-LABEL: @vhsubw_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhsubw.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vhsubw_h_b(v16i8 _1, v16i8 _2) { return __lsx_vhsubw_h_b(_1, _2); } +// CHECK-LABEL: @vhsubw_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhsubw.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vhsubw_w_h(v8i16 _1, v8i16 _2) { return __lsx_vhsubw_w_h(_1, _2); } +// CHECK-LABEL: @vhsubw_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vhsubw_d_w(v4i32 _1, v4i32 _2) { return __lsx_vhsubw_d_w(_1, _2); } +// CHECK-LABEL: @vhsubw_hu_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhsubw.hu.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vhsubw_hu_bu(v16u8 _1, v16u8 _2) { return __lsx_vhsubw_hu_bu(_1, _2); } +// CHECK-LABEL: @vhsubw_wu_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhsubw.wu.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vhsubw_wu_hu(v8u16 _1, v8u16 _2) { return __lsx_vhsubw_wu_hu(_1, _2); } +// CHECK-LABEL: @vhsubw_du_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.du.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vhsubw_du_wu(v4u32 _1, v4u32 _2) { return __lsx_vhsubw_du_wu(_1, _2); } +// CHECK-LABEL: @vmod_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmod.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vmod_b(v16i8 _1, v16i8 _2) { return __lsx_vmod_b(_1, _2); } +// CHECK-LABEL: @vmod_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmod.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vmod_h(v8i16 _1, v8i16 _2) { return __lsx_vmod_h(_1, _2); } +// CHECK-LABEL: @vmod_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmod.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vmod_w(v4i32 _1, v4i32 _2) { return __lsx_vmod_w(_1, _2); } +// CHECK-LABEL: @vmod_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmod.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vmod_d(v2i64 _1, v2i64 _2) { return __lsx_vmod_d(_1, _2); } +// CHECK-LABEL: @vmod_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmod.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vmod_bu(v16u8 _1, v16u8 _2) { return __lsx_vmod_bu(_1, _2); } +// CHECK-LABEL: @vmod_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmod.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8u16 vmod_hu(v8u16 _1, v8u16 _2) { return __lsx_vmod_hu(_1, _2); } +// CHECK-LABEL: @vmod_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmod.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4u32 vmod_wu(v4u32 _1, v4u32 _2) { return __lsx_vmod_wu(_1, _2); } +// CHECK-LABEL: @vmod_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmod.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2u64 vmod_du(v2u64 _1, v2u64 _2) { return __lsx_vmod_du(_1, _2); } +// CHECK-LABEL: @vreplve_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vreplve.b(<16 x i8> [[TMP0]], i32 [[_2:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vreplve_b(v16i8 _1, int _2) { return __lsx_vreplve_b(_1, _2); } +// CHECK-LABEL: @vreplve_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vreplve.h(<8 x i16> [[TMP0]], i32 [[_2:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vreplve_h(v8i16 _1, int _2) { return __lsx_vreplve_h(_1, _2); } +// CHECK-LABEL: @vreplve_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vreplve.w(<4 x i32> [[TMP0]], i32 [[_2:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vreplve_w(v4i32 _1, int _2) { return __lsx_vreplve_w(_1, _2); } +// CHECK-LABEL: @vreplve_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vreplve.d(<2 x i64> [[TMP0]], i32 [[_2:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vreplve_d(v2i64 _1, int _2) { return __lsx_vreplve_d(_1, _2); } +// CHECK-LABEL: @vreplvei_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vreplvei.b(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vreplvei_b(v16i8 _1) { return __lsx_vreplvei_b(_1, 1); } +// CHECK-LABEL: @vreplvei_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vreplvei.h(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vreplvei_h(v8i16 _1) { return __lsx_vreplvei_h(_1, 1); } +// CHECK-LABEL: @vreplvei_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vreplvei.w(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vreplvei_w(v4i32 _1) { return __lsx_vreplvei_w(_1, 1); } +// CHECK-LABEL: @vreplvei_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vreplvei.d(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vreplvei_d(v2i64 _1) { return __lsx_vreplvei_d(_1, 1); } +// CHECK-LABEL: @vpickev_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpickev.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vpickev_b(v16i8 _1, v16i8 _2) { return __lsx_vpickev_b(_1, _2); } +// CHECK-LABEL: @vpickev_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpickev.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vpickev_h(v8i16 _1, v8i16 _2) { return __lsx_vpickev_h(_1, _2); } +// CHECK-LABEL: @vpickev_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpickev.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vpickev_w(v4i32 _1, v4i32 _2) { return __lsx_vpickev_w(_1, _2); } +// CHECK-LABEL: @vpickev_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpickev.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vpickev_d(v2i64 _1, v2i64 _2) { return __lsx_vpickev_d(_1, _2); } +// CHECK-LABEL: @vpickod_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpickod.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vpickod_b(v16i8 _1, v16i8 _2) { return __lsx_vpickod_b(_1, _2); } +// CHECK-LABEL: @vpickod_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpickod.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vpickod_h(v8i16 _1, v8i16 _2) { return __lsx_vpickod_h(_1, _2); } +// CHECK-LABEL: @vpickod_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpickod.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vpickod_w(v4i32 _1, v4i32 _2) { return __lsx_vpickod_w(_1, _2); } +// CHECK-LABEL: @vpickod_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpickod.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vpickod_d(v2i64 _1, v2i64 _2) { return __lsx_vpickod_d(_1, _2); } +// CHECK-LABEL: @vilvh_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vilvh.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vilvh_b(v16i8 _1, v16i8 _2) { return __lsx_vilvh_b(_1, _2); } +// CHECK-LABEL: @vilvh_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vilvh.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vilvh_h(v8i16 _1, v8i16 _2) { return __lsx_vilvh_h(_1, _2); } +// CHECK-LABEL: @vilvh_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vilvh.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vilvh_w(v4i32 _1, v4i32 _2) { return __lsx_vilvh_w(_1, _2); } +// CHECK-LABEL: @vilvh_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vilvh.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vilvh_d(v2i64 _1, v2i64 _2) { return __lsx_vilvh_d(_1, _2); } +// CHECK-LABEL: @vilvl_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vilvl.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vilvl_b(v16i8 _1, v16i8 _2) { return __lsx_vilvl_b(_1, _2); } +// CHECK-LABEL: @vilvl_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vilvl.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vilvl_h(v8i16 _1, v8i16 _2) { return __lsx_vilvl_h(_1, _2); } +// CHECK-LABEL: @vilvl_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vilvl.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vilvl_w(v4i32 _1, v4i32 _2) { return __lsx_vilvl_w(_1, _2); } +// CHECK-LABEL: @vilvl_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vilvl.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vilvl_d(v2i64 _1, v2i64 _2) { return __lsx_vilvl_d(_1, _2); } +// CHECK-LABEL: @vpackev_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpackev.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vpackev_b(v16i8 _1, v16i8 _2) { return __lsx_vpackev_b(_1, _2); } +// CHECK-LABEL: @vpackev_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpackev.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vpackev_h(v8i16 _1, v8i16 _2) { return __lsx_vpackev_h(_1, _2); } +// CHECK-LABEL: @vpackev_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpackev.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vpackev_w(v4i32 _1, v4i32 _2) { return __lsx_vpackev_w(_1, _2); } +// CHECK-LABEL: @vpackev_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpackev.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vpackev_d(v2i64 _1, v2i64 _2) { return __lsx_vpackev_d(_1, _2); } +// CHECK-LABEL: @vpackod_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpackod.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vpackod_b(v16i8 _1, v16i8 _2) { return __lsx_vpackod_b(_1, _2); } +// CHECK-LABEL: @vpackod_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpackod.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vpackod_h(v8i16 _1, v8i16 _2) { return __lsx_vpackod_h(_1, _2); } +// CHECK-LABEL: @vpackod_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpackod.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vpackod_w(v4i32 _1, v4i32 _2) { return __lsx_vpackod_w(_1, _2); } +// CHECK-LABEL: @vpackod_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpackod.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vpackod_d(v2i64 _1, v2i64 _2) { return __lsx_vpackod_d(_1, _2); } +// CHECK-LABEL: @vshuf_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vshuf.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v8i16 vshuf_h(v8i16 _1, v8i16 _2, v8i16 _3) { + return __lsx_vshuf_h(_1, _2, _3); +} +// CHECK-LABEL: @vshuf_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vshuf.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v4i32 vshuf_w(v4i32 _1, v4i32 _2, v4i32 _3) { + return __lsx_vshuf_w(_1, _2, _3); +} +// CHECK-LABEL: @vshuf_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vshuf.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v2i64 vshuf_d(v2i64 _1, v2i64 _2, v2i64 _3) { + return __lsx_vshuf_d(_1, _2, _3); +} +// CHECK-LABEL: @vand_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vand.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vand_v(v16u8 _1, v16u8 _2) { return __lsx_vand_v(_1, _2); } +// CHECK-LABEL: @vandi_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vandi.b(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16u8 vandi_b(v16u8 _1) { return __lsx_vandi_b(_1, 1); } +// CHECK-LABEL: @vor_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vor.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vor_v(v16u8 _1, v16u8 _2) { return __lsx_vor_v(_1, _2); } +// CHECK-LABEL: @vori_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vori.b(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16u8 vori_b(v16u8 _1) { return __lsx_vori_b(_1, 1); } +// CHECK-LABEL: @vnor_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vnor.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vnor_v(v16u8 _1, v16u8 _2) { return __lsx_vnor_v(_1, _2); } +// CHECK-LABEL: @vnori_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vnori.b(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16u8 vnori_b(v16u8 _1) { return __lsx_vnori_b(_1, 1); } +// CHECK-LABEL: @vxor_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vxor.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vxor_v(v16u8 _1, v16u8 _2) { return __lsx_vxor_v(_1, _2); } +// CHECK-LABEL: @vxori_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vxori.b(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16u8 vxori_b(v16u8 _1) { return __lsx_vxori_b(_1, 1); } +// CHECK-LABEL: @vbitsel_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitsel.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v16u8 vbitsel_v(v16u8 _1, v16u8 _2, v16u8 _3) { + return __lsx_vbitsel_v(_1, _2, _3); +} +// CHECK-LABEL: @vbitseli_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitseli.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vbitseli_b(v16u8 _1, v16u8 _2) { return __lsx_vbitseli_b(_1, _2, 1); } +// CHECK-LABEL: @vshuf4i_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vshuf4i.b(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vshuf4i_b(v16i8 _1) { return __lsx_vshuf4i_b(_1, 1); } +// CHECK-LABEL: @vshuf4i_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vshuf4i.h(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vshuf4i_h(v8i16 _1) { return __lsx_vshuf4i_h(_1, 1); } +// CHECK-LABEL: @vshuf4i_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vshuf4i.w(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vshuf4i_w(v4i32 _1) { return __lsx_vshuf4i_w(_1, 1); } +// CHECK-LABEL: @vreplgr2vr_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vreplgr2vr.b(i32 [[_1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to i128 +// CHECK-NEXT: ret i128 [[TMP1]] +// +v16i8 vreplgr2vr_b(int _1) { return __lsx_vreplgr2vr_b(_1); } +// CHECK-LABEL: @vreplgr2vr_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vreplgr2vr.h(i32 [[_1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to i128 +// CHECK-NEXT: ret i128 [[TMP1]] +// +v8i16 vreplgr2vr_h(int _1) { return __lsx_vreplgr2vr_h(_1); } +// CHECK-LABEL: @vreplgr2vr_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vreplgr2vr.w(i32 [[_1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to i128 +// CHECK-NEXT: ret i128 [[TMP1]] +// +v4i32 vreplgr2vr_w(int _1) { return __lsx_vreplgr2vr_w(_1); } +// CHECK-LABEL: @vreplgr2vr_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vreplgr2vr.d(i64 [[_1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to i128 +// CHECK-NEXT: ret i128 [[TMP1]] +// +v2i64 vreplgr2vr_d(long _1) { return __lsx_vreplgr2vr_d(_1); } +// CHECK-LABEL: @vpcnt_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpcnt.b(<16 x i8> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vpcnt_b(v16i8 _1) { return __lsx_vpcnt_b(_1); } +// CHECK-LABEL: @vpcnt_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpcnt.h(<8 x i16> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vpcnt_h(v8i16 _1) { return __lsx_vpcnt_h(_1); } +// CHECK-LABEL: @vpcnt_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpcnt.w(<4 x i32> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vpcnt_w(v4i32 _1) { return __lsx_vpcnt_w(_1); } +// CHECK-LABEL: @vpcnt_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpcnt.d(<2 x i64> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vpcnt_d(v2i64 _1) { return __lsx_vpcnt_d(_1); } +// CHECK-LABEL: @vclo_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vclo.b(<16 x i8> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vclo_b(v16i8 _1) { return __lsx_vclo_b(_1); } +// CHECK-LABEL: @vclo_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vclo.h(<8 x i16> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vclo_h(v8i16 _1) { return __lsx_vclo_h(_1); } +// CHECK-LABEL: @vclo_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vclo.w(<4 x i32> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vclo_w(v4i32 _1) { return __lsx_vclo_w(_1); } +// CHECK-LABEL: @vclo_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vclo.d(<2 x i64> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vclo_d(v2i64 _1) { return __lsx_vclo_d(_1); } +// CHECK-LABEL: @vclz_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vclz.b(<16 x i8> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vclz_b(v16i8 _1) { return __lsx_vclz_b(_1); } +// CHECK-LABEL: @vclz_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vclz.h(<8 x i16> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vclz_h(v8i16 _1) { return __lsx_vclz_h(_1); } +// CHECK-LABEL: @vclz_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vclz.w(<4 x i32> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vclz_w(v4i32 _1) { return __lsx_vclz_w(_1); } +// CHECK-LABEL: @vclz_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vclz.d(<2 x i64> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vclz_d(v2i64 _1) { return __lsx_vclz_d(_1); } +// CHECK-LABEL: @vpickve2gr_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int vpickve2gr_b(v16i8 _1) { return __lsx_vpickve2gr_b(_1, 1); } +// CHECK-LABEL: @vpickve2gr_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.h(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int vpickve2gr_h(v8i16 _1) { return __lsx_vpickve2gr_h(_1, 1); } +// CHECK-LABEL: @vpickve2gr_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.w(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int vpickve2gr_w(v4i32 _1) { return __lsx_vpickve2gr_w(_1, 1); } +// CHECK-LABEL: @vpickve2gr_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.loongarch.lsx.vpickve2gr.d(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: ret i64 [[TMP1]] +// +long vpickve2gr_d(v2i64 _1) { return __lsx_vpickve2gr_d(_1, 1); } +// CHECK-LABEL: @vpickve2gr_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.bu(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: ret i32 [[TMP1]] +// +unsigned int vpickve2gr_bu(v16i8 _1) { return __lsx_vpickve2gr_bu(_1, 1); } +// CHECK-LABEL: @vpickve2gr_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.hu(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: ret i32 [[TMP1]] +// +unsigned int vpickve2gr_hu(v8i16 _1) { return __lsx_vpickve2gr_hu(_1, 1); } +// CHECK-LABEL: @vpickve2gr_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.wu(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: ret i32 [[TMP1]] +// +unsigned int vpickve2gr_wu(v4i32 _1) { return __lsx_vpickve2gr_wu(_1, 1); } +// CHECK-LABEL: @vpickve2gr_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.loongarch.lsx.vpickve2gr.du(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: ret i64 [[TMP1]] +// +unsigned long int vpickve2gr_du(v2i64 _1) { return __lsx_vpickve2gr_du(_1, 1); } +// CHECK-LABEL: @vinsgr2vr_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vinsgr2vr.b(<16 x i8> [[TMP0]], i32 1, i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vinsgr2vr_b(v16i8 _1) { return __lsx_vinsgr2vr_b(_1, 1, 1); } +// CHECK-LABEL: @vinsgr2vr_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vinsgr2vr.h(<8 x i16> [[TMP0]], i32 1, i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vinsgr2vr_h(v8i16 _1) { return __lsx_vinsgr2vr_h(_1, 1, 1); } +// CHECK-LABEL: @vinsgr2vr_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vinsgr2vr.w(<4 x i32> [[TMP0]], i32 1, i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vinsgr2vr_w(v4i32 _1) { return __lsx_vinsgr2vr_w(_1, 1, 1); } +// CHECK-LABEL: @vinsgr2vr_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vinsgr2vr.d(<2 x i64> [[TMP0]], i64 1, i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vinsgr2vr_d(v2i64 _1) { return __lsx_vinsgr2vr_d(_1, 1, 1); } +// CHECK-LABEL: @vfadd_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfadd.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4f32 vfadd_s(v4f32 _1, v4f32 _2) { return __lsx_vfadd_s(_1, _2); } +// CHECK-LABEL: @vfadd_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfadd.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2f64 vfadd_d(v2f64 _1, v2f64 _2) { return __lsx_vfadd_d(_1, _2); } +// CHECK-LABEL: @vfsub_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfsub.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4f32 vfsub_s(v4f32 _1, v4f32 _2) { return __lsx_vfsub_s(_1, _2); } +// CHECK-LABEL: @vfsub_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfsub.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2f64 vfsub_d(v2f64 _1, v2f64 _2) { return __lsx_vfsub_d(_1, _2); } +// CHECK-LABEL: @vfmul_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmul.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4f32 vfmul_s(v4f32 _1, v4f32 _2) { return __lsx_vfmul_s(_1, _2); } +// CHECK-LABEL: @vfmul_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmul.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2f64 vfmul_d(v2f64 _1, v2f64 _2) { return __lsx_vfmul_d(_1, _2); } +// CHECK-LABEL: @vfdiv_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfdiv.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4f32 vfdiv_s(v4f32 _1, v4f32 _2) { return __lsx_vfdiv_s(_1, _2); } +// CHECK-LABEL: @vfdiv_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfdiv.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2f64 vfdiv_d(v2f64 _1, v2f64 _2) { return __lsx_vfdiv_d(_1, _2); } +// CHECK-LABEL: @vfcvt_h_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vfcvt.h.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vfcvt_h_s(v4f32 _1, v4f32 _2) { return __lsx_vfcvt_h_s(_1, _2); } +// CHECK-LABEL: @vfcvt_s_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfcvt.s.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4f32 vfcvt_s_d(v2f64 _1, v2f64 _2) { return __lsx_vfcvt_s_d(_1, _2); } +// CHECK-LABEL: @vfmin_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmin.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4f32 vfmin_s(v4f32 _1, v4f32 _2) { return __lsx_vfmin_s(_1, _2); } +// CHECK-LABEL: @vfmin_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmin.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2f64 vfmin_d(v2f64 _1, v2f64 _2) { return __lsx_vfmin_d(_1, _2); } +// CHECK-LABEL: @vfmina_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmina.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4f32 vfmina_s(v4f32 _1, v4f32 _2) { return __lsx_vfmina_s(_1, _2); } +// CHECK-LABEL: @vfmina_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmina.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2f64 vfmina_d(v2f64 _1, v2f64 _2) { return __lsx_vfmina_d(_1, _2); } +// CHECK-LABEL: @vfmax_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmax.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4f32 vfmax_s(v4f32 _1, v4f32 _2) { return __lsx_vfmax_s(_1, _2); } +// CHECK-LABEL: @vfmax_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmax.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2f64 vfmax_d(v2f64 _1, v2f64 _2) { return __lsx_vfmax_d(_1, _2); } +// CHECK-LABEL: @vfmaxa_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmaxa.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4f32 vfmaxa_s(v4f32 _1, v4f32 _2) { return __lsx_vfmaxa_s(_1, _2); } +// CHECK-LABEL: @vfmaxa_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmaxa.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2f64 vfmaxa_d(v2f64 _1, v2f64 _2) { return __lsx_vfmaxa_d(_1, _2); } +// CHECK-LABEL: @vfclass_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfclass.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vfclass_s(v4f32 _1) { return __lsx_vfclass_s(_1); } +// CHECK-LABEL: @vfclass_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfclass.d(<2 x double> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vfclass_d(v2f64 _1) { return __lsx_vfclass_d(_1); } +// CHECK-LABEL: @vfsqrt_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfsqrt.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4f32 vfsqrt_s(v4f32 _1) { return __lsx_vfsqrt_s(_1); } +// CHECK-LABEL: @vfsqrt_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfsqrt.d(<2 x double> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2f64 vfsqrt_d(v2f64 _1) { return __lsx_vfsqrt_d(_1); } +// CHECK-LABEL: @vfrecip_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrecip.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4f32 vfrecip_s(v4f32 _1) { return __lsx_vfrecip_s(_1); } +// CHECK-LABEL: @vfrecip_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrecip.d(<2 x double> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2f64 vfrecip_d(v2f64 _1) { return __lsx_vfrecip_d(_1); } +// CHECK-LABEL: @vfrint_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrint.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4f32 vfrint_s(v4f32 _1) { return __lsx_vfrint_s(_1); } +// CHECK-LABEL: @vfrint_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrint.d(<2 x double> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2f64 vfrint_d(v2f64 _1) { return __lsx_vfrint_d(_1); } +// CHECK-LABEL: @vfrsqrt_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrsqrt.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4f32 vfrsqrt_s(v4f32 _1) { return __lsx_vfrsqrt_s(_1); } +// CHECK-LABEL: @vfrsqrt_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrsqrt.d(<2 x double> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2f64 vfrsqrt_d(v2f64 _1) { return __lsx_vfrsqrt_d(_1); } +// CHECK-LABEL: @vflogb_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vflogb.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4f32 vflogb_s(v4f32 _1) { return __lsx_vflogb_s(_1); } +// CHECK-LABEL: @vflogb_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vflogb.d(<2 x double> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2f64 vflogb_d(v2f64 _1) { return __lsx_vflogb_d(_1); } +// CHECK-LABEL: @vfcvth_s_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfcvth.s.h(<8 x i16> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4f32 vfcvth_s_h(v8i16 _1) { return __lsx_vfcvth_s_h(_1); } +// CHECK-LABEL: @vfcvth_d_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfcvth.d.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2f64 vfcvth_d_s(v4f32 _1) { return __lsx_vfcvth_d_s(_1); } +// CHECK-LABEL: @vfcvtl_s_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfcvtl.s.h(<8 x i16> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4f32 vfcvtl_s_h(v8i16 _1) { return __lsx_vfcvtl_s_h(_1); } +// CHECK-LABEL: @vfcvtl_d_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfcvtl.d.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2f64 vfcvtl_d_s(v4f32 _1) { return __lsx_vfcvtl_d_s(_1); } +// CHECK-LABEL: @vftint_w_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftint.w.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vftint_w_s(v4f32 _1) { return __lsx_vftint_w_s(_1); } +// CHECK-LABEL: @vftint_l_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftint.l.d(<2 x double> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vftint_l_d(v2f64 _1) { return __lsx_vftint_l_d(_1); } +// CHECK-LABEL: @vftint_wu_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftint.wu.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4u32 vftint_wu_s(v4f32 _1) { return __lsx_vftint_wu_s(_1); } +// CHECK-LABEL: @vftint_lu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftint.lu.d(<2 x double> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2u64 vftint_lu_d(v2f64 _1) { return __lsx_vftint_lu_d(_1); } +// CHECK-LABEL: @vftintrz_w_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vftintrz_w_s(v4f32 _1) { return __lsx_vftintrz_w_s(_1); } +// CHECK-LABEL: @vftintrz_l_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrz.l.d(<2 x double> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vftintrz_l_d(v2f64 _1) { return __lsx_vftintrz_l_d(_1); } +// CHECK-LABEL: @vftintrz_wu_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.wu.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4u32 vftintrz_wu_s(v4f32 _1) { return __lsx_vftintrz_wu_s(_1); } +// CHECK-LABEL: @vftintrz_lu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrz.lu.d(<2 x double> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2u64 vftintrz_lu_d(v2f64 _1) { return __lsx_vftintrz_lu_d(_1); } +// CHECK-LABEL: @vffint_s_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.w(<4 x i32> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4f32 vffint_s_w(v4i32 _1) { return __lsx_vffint_s_w(_1); } +// CHECK-LABEL: @vffint_d_l( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffint.d.l(<2 x i64> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2f64 vffint_d_l(v2i64 _1) { return __lsx_vffint_d_l(_1); } +// CHECK-LABEL: @vffint_s_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.wu(<4 x i32> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4f32 vffint_s_wu(v4u32 _1) { return __lsx_vffint_s_wu(_1); } +// CHECK-LABEL: @vffint_d_lu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffint.d.lu(<2 x i64> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2f64 vffint_d_lu(v2u64 _1) { return __lsx_vffint_d_lu(_1); } +// CHECK-LABEL: @vandn_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vandn.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vandn_v(v16u8 _1, v16u8 _2) { return __lsx_vandn_v(_1, _2); } +// CHECK-LABEL: @vneg_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vneg.b(<16 x i8> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vneg_b(v16i8 _1) { return __lsx_vneg_b(_1); } +// CHECK-LABEL: @vneg_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vneg.h(<8 x i16> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vneg_h(v8i16 _1) { return __lsx_vneg_h(_1); } +// CHECK-LABEL: @vneg_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vneg.w(<4 x i32> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vneg_w(v4i32 _1) { return __lsx_vneg_w(_1); } +// CHECK-LABEL: @vneg_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vneg.d(<2 x i64> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vneg_d(v2i64 _1) { return __lsx_vneg_d(_1); } +// CHECK-LABEL: @vmuh_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmuh.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vmuh_b(v16i8 _1, v16i8 _2) { return __lsx_vmuh_b(_1, _2); } +// CHECK-LABEL: @vmuh_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmuh.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vmuh_h(v8i16 _1, v8i16 _2) { return __lsx_vmuh_h(_1, _2); } +// CHECK-LABEL: @vmuh_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmuh.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vmuh_w(v4i32 _1, v4i32 _2) { return __lsx_vmuh_w(_1, _2); } +// CHECK-LABEL: @vmuh_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmuh.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vmuh_d(v2i64 _1, v2i64 _2) { return __lsx_vmuh_d(_1, _2); } +// CHECK-LABEL: @vmuh_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmuh.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vmuh_bu(v16u8 _1, v16u8 _2) { return __lsx_vmuh_bu(_1, _2); } +// CHECK-LABEL: @vmuh_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmuh.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8u16 vmuh_hu(v8u16 _1, v8u16 _2) { return __lsx_vmuh_hu(_1, _2); } +// CHECK-LABEL: @vmuh_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmuh.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4u32 vmuh_wu(v4u32 _1, v4u32 _2) { return __lsx_vmuh_wu(_1, _2); } +// CHECK-LABEL: @vmuh_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmuh.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2u64 vmuh_du(v2u64 _1, v2u64 _2) { return __lsx_vmuh_du(_1, _2); } +// CHECK-LABEL: @vsllwil_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsllwil.h.b(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vsllwil_h_b(v16i8 _1) { return __lsx_vsllwil_h_b(_1, 1); } +// CHECK-LABEL: @vsllwil_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsllwil.w.h(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vsllwil_w_h(v8i16 _1) { return __lsx_vsllwil_w_h(_1, 1); } +// CHECK-LABEL: @vsllwil_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsllwil.d.w(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vsllwil_d_w(v4i32 _1) { return __lsx_vsllwil_d_w(_1, 1); } +// CHECK-LABEL: @vsllwil_hu_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsllwil.hu.bu(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8u16 vsllwil_hu_bu(v16u8 _1) { return __lsx_vsllwil_hu_bu(_1, 1); } +// CHECK-LABEL: @vsllwil_wu_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsllwil.wu.hu(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4u32 vsllwil_wu_hu(v8u16 _1) { return __lsx_vsllwil_wu_hu(_1, 1); } +// CHECK-LABEL: @vsllwil_du_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsllwil.du.wu(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2u64 vsllwil_du_wu(v4u32 _1) { return __lsx_vsllwil_du_wu(_1, 1); } +// CHECK-LABEL: @vsran_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsran.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vsran_b_h(v8i16 _1, v8i16 _2) { return __lsx_vsran_b_h(_1, _2); } +// CHECK-LABEL: @vsran_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsran.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vsran_h_w(v4i32 _1, v4i32 _2) { return __lsx_vsran_h_w(_1, _2); } +// CHECK-LABEL: @vsran_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsran.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vsran_w_d(v2i64 _1, v2i64 _2) { return __lsx_vsran_w_d(_1, _2); } +// CHECK-LABEL: @vssran_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssran.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vssran_b_h(v8i16 _1, v8i16 _2) { return __lsx_vssran_b_h(_1, _2); } +// CHECK-LABEL: @vssran_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssran.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vssran_h_w(v4i32 _1, v4i32 _2) { return __lsx_vssran_h_w(_1, _2); } +// CHECK-LABEL: @vssran_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssran.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vssran_w_d(v2i64 _1, v2i64 _2) { return __lsx_vssran_w_d(_1, _2); } +// CHECK-LABEL: @vssran_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssran.bu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vssran_bu_h(v8u16 _1, v8u16 _2) { return __lsx_vssran_bu_h(_1, _2); } +// CHECK-LABEL: @vssran_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssran.hu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8u16 vssran_hu_w(v4u32 _1, v4u32 _2) { return __lsx_vssran_hu_w(_1, _2); } +// CHECK-LABEL: @vssran_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssran.wu.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4u32 vssran_wu_d(v2u64 _1, v2u64 _2) { return __lsx_vssran_wu_d(_1, _2); } +// CHECK-LABEL: @vsrarn_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrarn.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vsrarn_b_h(v8i16 _1, v8i16 _2) { return __lsx_vsrarn_b_h(_1, _2); } +// CHECK-LABEL: @vsrarn_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrarn.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vsrarn_h_w(v4i32 _1, v4i32 _2) { return __lsx_vsrarn_h_w(_1, _2); } +// CHECK-LABEL: @vsrarn_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrarn.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vsrarn_w_d(v2i64 _1, v2i64 _2) { return __lsx_vsrarn_w_d(_1, _2); } +// CHECK-LABEL: @vssrarn_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarn.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vssrarn_b_h(v8i16 _1, v8i16 _2) { return __lsx_vssrarn_b_h(_1, _2); } +// CHECK-LABEL: @vssrarn_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarn.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vssrarn_h_w(v4i32 _1, v4i32 _2) { return __lsx_vssrarn_h_w(_1, _2); } +// CHECK-LABEL: @vssrarn_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarn.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vssrarn_w_d(v2i64 _1, v2i64 _2) { return __lsx_vssrarn_w_d(_1, _2); } +// CHECK-LABEL: @vssrarn_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarn.bu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vssrarn_bu_h(v8u16 _1, v8u16 _2) { return __lsx_vssrarn_bu_h(_1, _2); } +// CHECK-LABEL: @vssrarn_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarn.hu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8u16 vssrarn_hu_w(v4u32 _1, v4u32 _2) { return __lsx_vssrarn_hu_w(_1, _2); } +// CHECK-LABEL: @vssrarn_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarn.wu.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4u32 vssrarn_wu_d(v2u64 _1, v2u64 _2) { return __lsx_vssrarn_wu_d(_1, _2); } +// CHECK-LABEL: @vsrln_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrln.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vsrln_b_h(v8i16 _1, v8i16 _2) { return __lsx_vsrln_b_h(_1, _2); } +// CHECK-LABEL: @vsrln_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrln.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vsrln_h_w(v4i32 _1, v4i32 _2) { return __lsx_vsrln_h_w(_1, _2); } +// CHECK-LABEL: @vsrln_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrln.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vsrln_w_d(v2i64 _1, v2i64 _2) { return __lsx_vsrln_w_d(_1, _2); } +// CHECK-LABEL: @vssrln_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrln.bu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vssrln_bu_h(v8u16 _1, v8u16 _2) { return __lsx_vssrln_bu_h(_1, _2); } +// CHECK-LABEL: @vssrln_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrln.hu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8u16 vssrln_hu_w(v4u32 _1, v4u32 _2) { return __lsx_vssrln_hu_w(_1, _2); } +// CHECK-LABEL: @vssrln_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrln.wu.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4u32 vssrln_wu_d(v2u64 _1, v2u64 _2) { return __lsx_vssrln_wu_d(_1, _2); } +// CHECK-LABEL: @vsrlrn_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlrn.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vsrlrn_b_h(v8i16 _1, v8i16 _2) { return __lsx_vsrlrn_b_h(_1, _2); } +// CHECK-LABEL: @vsrlrn_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlrn.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vsrlrn_h_w(v4i32 _1, v4i32 _2) { return __lsx_vsrlrn_h_w(_1, _2); } +// CHECK-LABEL: @vsrlrn_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlrn.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vsrlrn_w_d(v2i64 _1, v2i64 _2) { return __lsx_vsrlrn_w_d(_1, _2); } +// CHECK-LABEL: @vssrlrn_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrn.bu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vssrlrn_bu_h(v8u16 _1, v8u16 _2) { return __lsx_vssrlrn_bu_h(_1, _2); } +// CHECK-LABEL: @vssrlrn_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrn.hu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8u16 vssrlrn_hu_w(v4u32 _1, v4u32 _2) { return __lsx_vssrlrn_hu_w(_1, _2); } +// CHECK-LABEL: @vssrlrn_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrn.wu.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4u32 vssrlrn_wu_d(v2u64 _1, v2u64 _2) { return __lsx_vssrlrn_wu_d(_1, _2); } +// CHECK-LABEL: @vfrstpi_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vfrstpi.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vfrstpi_b(v16i8 _1, v16i8 _2) { return __lsx_vfrstpi_b(_1, _2, 1); } +// CHECK-LABEL: @vfrstpi_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vfrstpi.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vfrstpi_h(v8i16 _1, v8i16 _2) { return __lsx_vfrstpi_h(_1, _2, 1); } +// CHECK-LABEL: @vfrstp_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vfrstp.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v16i8 vfrstp_b(v16i8 _1, v16i8 _2, v16i8 _3) { + return __lsx_vfrstp_b(_1, _2, _3); +} +// CHECK-LABEL: @vfrstp_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vfrstp.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v8i16 vfrstp_h(v8i16 _1, v8i16 _2, v8i16 _3) { + return __lsx_vfrstp_h(_1, _2, _3); +} +// CHECK-LABEL: @vshuf4i_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vshuf4i.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vshuf4i_d(v2i64 _1, v2i64 _2) { return __lsx_vshuf4i_d(_1, _2, 1); } +// CHECK-LABEL: @vbsrl_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbsrl.v(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vbsrl_v(v16i8 _1) { return __lsx_vbsrl_v(_1, 1); } +// CHECK-LABEL: @vbsll_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbsll.v(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vbsll_v(v16i8 _1) { return __lsx_vbsll_v(_1, 1); } +// CHECK-LABEL: @vextrins_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vextrins.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vextrins_b(v16i8 _1, v16i8 _2) { return __lsx_vextrins_b(_1, _2, 1); } +// CHECK-LABEL: @vextrins_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vextrins.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vextrins_h(v8i16 _1, v8i16 _2) { return __lsx_vextrins_h(_1, _2, 1); } +// CHECK-LABEL: @vextrins_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vextrins.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vextrins_w(v4i32 _1, v4i32 _2) { return __lsx_vextrins_w(_1, _2, 1); } +// CHECK-LABEL: @vextrins_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vextrins.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vextrins_d(v2i64 _1, v2i64 _2) { return __lsx_vextrins_d(_1, _2, 1); } +// CHECK-LABEL: @vmskltz_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmskltz.b(<16 x i8> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vmskltz_b(v16i8 _1) { return __lsx_vmskltz_b(_1); } +// CHECK-LABEL: @vmskltz_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmskltz.h(<8 x i16> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vmskltz_h(v8i16 _1) { return __lsx_vmskltz_h(_1); } +// CHECK-LABEL: @vmskltz_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmskltz.w(<4 x i32> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vmskltz_w(v4i32 _1) { return __lsx_vmskltz_w(_1); } +// CHECK-LABEL: @vmskltz_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmskltz.d(<2 x i64> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vmskltz_d(v2i64 _1) { return __lsx_vmskltz_d(_1); } +// CHECK-LABEL: @vsigncov_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsigncov.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vsigncov_b(v16i8 _1, v16i8 _2) { return __lsx_vsigncov_b(_1, _2); } +// CHECK-LABEL: @vsigncov_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsigncov.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vsigncov_h(v8i16 _1, v8i16 _2) { return __lsx_vsigncov_h(_1, _2); } +// CHECK-LABEL: @vsigncov_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsigncov.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vsigncov_w(v4i32 _1, v4i32 _2) { return __lsx_vsigncov_w(_1, _2); } +// CHECK-LABEL: @vsigncov_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsigncov.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vsigncov_d(v2i64 _1, v2i64 _2) { return __lsx_vsigncov_d(_1, _2); } +// CHECK-LABEL: @vfmadd_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmadd.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]], <4 x float> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v4f32 vfmadd_s(v4f32 _1, v4f32 _2, v4f32 _3) { + return __lsx_vfmadd_s(_1, _2, _3); +} +// CHECK-LABEL: @vfmadd_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmadd.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]], <2 x double> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v2f64 vfmadd_d(v2f64 _1, v2f64 _2, v2f64 _3) { + return __lsx_vfmadd_d(_1, _2, _3); +} +// CHECK-LABEL: @vfmsub_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmsub.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]], <4 x float> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v4f32 vfmsub_s(v4f32 _1, v4f32 _2, v4f32 _3) { + return __lsx_vfmsub_s(_1, _2, _3); +} +// CHECK-LABEL: @vfmsub_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmsub.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]], <2 x double> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v2f64 vfmsub_d(v2f64 _1, v2f64 _2, v2f64 _3) { + return __lsx_vfmsub_d(_1, _2, _3); +} +// CHECK-LABEL: @vfnmadd_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfnmadd.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]], <4 x float> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v4f32 vfnmadd_s(v4f32 _1, v4f32 _2, v4f32 _3) { + return __lsx_vfnmadd_s(_1, _2, _3); +} +// CHECK-LABEL: @vfnmadd_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfnmadd.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]], <2 x double> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v2f64 vfnmadd_d(v2f64 _1, v2f64 _2, v2f64 _3) { + return __lsx_vfnmadd_d(_1, _2, _3); +} +// CHECK-LABEL: @vfnmsub_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfnmsub.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]], <4 x float> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v4f32 vfnmsub_s(v4f32 _1, v4f32 _2, v4f32 _3) { + return __lsx_vfnmsub_s(_1, _2, _3); +} +// CHECK-LABEL: @vfnmsub_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfnmsub.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]], <2 x double> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v2f64 vfnmsub_d(v2f64 _1, v2f64 _2, v2f64 _3) { + return __lsx_vfnmsub_d(_1, _2, _3); +} +// CHECK-LABEL: @vftintrne_w_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vftintrne_w_s(v4f32 _1) { return __lsx_vftintrne_w_s(_1); } +// CHECK-LABEL: @vftintrne_l_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrne.l.d(<2 x double> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vftintrne_l_d(v2f64 _1) { return __lsx_vftintrne_l_d(_1); } +// CHECK-LABEL: @vftintrp_w_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vftintrp_w_s(v4f32 _1) { return __lsx_vftintrp_w_s(_1); } +// CHECK-LABEL: @vftintrp_l_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrp.l.d(<2 x double> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vftintrp_l_d(v2f64 _1) { return __lsx_vftintrp_l_d(_1); } +// CHECK-LABEL: @vftintrm_w_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vftintrm_w_s(v4f32 _1) { return __lsx_vftintrm_w_s(_1); } +// CHECK-LABEL: @vftintrm_l_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrm.l.d(<2 x double> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vftintrm_l_d(v2f64 _1) { return __lsx_vftintrm_l_d(_1); } +// CHECK-LABEL: @vftint_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftint.w.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vftint_w_d(v2f64 _1, v2f64 _2) { return __lsx_vftint_w_d(_1, _2); } +// CHECK-LABEL: @vffint_s_l( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.l(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4f32 vffint_s_l(v2i64 _1, v2i64 _2) { return __lsx_vffint_s_l(_1, _2); } +// CHECK-LABEL: @vftintrz_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vftintrz_w_d(v2f64 _1, v2f64 _2) { return __lsx_vftintrz_w_d(_1, _2); } +// CHECK-LABEL: @vftintrp_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vftintrp_w_d(v2f64 _1, v2f64 _2) { return __lsx_vftintrp_w_d(_1, _2); } +// CHECK-LABEL: @vftintrm_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vftintrm_w_d(v2f64 _1, v2f64 _2) { return __lsx_vftintrm_w_d(_1, _2); } +// CHECK-LABEL: @vftintrne_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vftintrne_w_d(v2f64 _1, v2f64 _2) { return __lsx_vftintrne_w_d(_1, _2); } +// CHECK-LABEL: @vftintl_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintl.l.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vftintl_l_s(v4f32 _1) { return __lsx_vftintl_l_s(_1); } +// CHECK-LABEL: @vftinth_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftinth.l.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vftinth_l_s(v4f32 _1) { return __lsx_vftinth_l_s(_1); } +// CHECK-LABEL: @vffinth_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffinth.d.w(<4 x i32> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2f64 vffinth_d_w(v4i32 _1) { return __lsx_vffinth_d_w(_1); } +// CHECK-LABEL: @vffintl_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffintl.d.w(<4 x i32> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2f64 vffintl_d_w(v4i32 _1) { return __lsx_vffintl_d_w(_1); } +// CHECK-LABEL: @vftintrzl_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrzl.l.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vftintrzl_l_s(v4f32 _1) { return __lsx_vftintrzl_l_s(_1); } +// CHECK-LABEL: @vftintrzh_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrzh.l.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vftintrzh_l_s(v4f32 _1) { return __lsx_vftintrzh_l_s(_1); } +// CHECK-LABEL: @vftintrpl_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrpl.l.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vftintrpl_l_s(v4f32 _1) { return __lsx_vftintrpl_l_s(_1); } +// CHECK-LABEL: @vftintrph_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrph.l.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vftintrph_l_s(v4f32 _1) { return __lsx_vftintrph_l_s(_1); } +// CHECK-LABEL: @vftintrml_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrml.l.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vftintrml_l_s(v4f32 _1) { return __lsx_vftintrml_l_s(_1); } +// CHECK-LABEL: @vftintrmh_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrmh.l.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vftintrmh_l_s(v4f32 _1) { return __lsx_vftintrmh_l_s(_1); } +// CHECK-LABEL: @vftintrnel_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrnel.l.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vftintrnel_l_s(v4f32 _1) { return __lsx_vftintrnel_l_s(_1); } +// CHECK-LABEL: @vftintrneh_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrneh.l.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vftintrneh_l_s(v4f32 _1) { return __lsx_vftintrneh_l_s(_1); } +// CHECK-LABEL: @vfrintrne_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrne.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vfrintrne_s(v4f32 _1) { return __lsx_vfrintrne_s(_1); } +// CHECK-LABEL: @vfrintrne_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrne.d(<2 x double> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vfrintrne_d(v2f64 _1) { return __lsx_vfrintrne_d(_1); } +// CHECK-LABEL: @vfrintrz_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrz.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vfrintrz_s(v4f32 _1) { return __lsx_vfrintrz_s(_1); } +// CHECK-LABEL: @vfrintrz_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrz.d(<2 x double> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vfrintrz_d(v2f64 _1) { return __lsx_vfrintrz_d(_1); } +// CHECK-LABEL: @vfrintrp_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrp.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vfrintrp_s(v4f32 _1) { return __lsx_vfrintrp_s(_1); } +// CHECK-LABEL: @vfrintrp_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrp.d(<2 x double> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vfrintrp_d(v2f64 _1) { return __lsx_vfrintrp_d(_1); } +// CHECK-LABEL: @vfrintrm_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrm.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vfrintrm_s(v4f32 _1) { return __lsx_vfrintrm_s(_1); } +// CHECK-LABEL: @vfrintrm_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrm.d(<2 x double> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vfrintrm_d(v2f64 _1) { return __lsx_vfrintrm_d(_1); } +// CHECK-LABEL: @vstelm_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.b(<16 x i8> [[TMP0]], ptr [[_2:%.*]], i32 1, i32 1) +// CHECK-NEXT: ret void +// +void vstelm_b(v16i8 _1, void *_2) { return __lsx_vstelm_b(_1, _2, 1, 1); } +// CHECK-LABEL: @vstelm_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.h(<8 x i16> [[TMP0]], ptr [[_2:%.*]], i32 2, i32 1) +// CHECK-NEXT: ret void +// +void vstelm_h(v8i16 _1, void *_2) { return __lsx_vstelm_h(_1, _2, 2, 1); } +// CHECK-LABEL: @vstelm_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.w(<4 x i32> [[TMP0]], ptr [[_2:%.*]], i32 4, i32 1) +// CHECK-NEXT: ret void +// +void vstelm_w(v4i32 _1, void *_2) { return __lsx_vstelm_w(_1, _2, 4, 1); } +// CHECK-LABEL: @vstelm_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.d(<2 x i64> [[TMP0]], ptr [[_2:%.*]], i32 8, i32 1) +// CHECK-NEXT: ret void +// +void vstelm_d(v2i64 _1, void *_2) { return __lsx_vstelm_d(_1, _2, 8, 1); } +// CHECK-LABEL: @vaddwev_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vaddwev_d_w(v4i32 _1, v4i32 _2) { return __lsx_vaddwev_d_w(_1, _2); } +// CHECK-LABEL: @vaddwev_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vaddwev_w_h(v8i16 _1, v8i16 _2) { return __lsx_vaddwev_w_h(_1, _2); } +// CHECK-LABEL: @vaddwev_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vaddwev_h_b(v16i8 _1, v16i8 _2) { return __lsx_vaddwev_h_b(_1, _2); } +// CHECK-LABEL: @vaddwod_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vaddwod_d_w(v4i32 _1, v4i32 _2) { return __lsx_vaddwod_d_w(_1, _2); } +// CHECK-LABEL: @vaddwod_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vaddwod_w_h(v8i16 _1, v8i16 _2) { return __lsx_vaddwod_w_h(_1, _2); } +// CHECK-LABEL: @vaddwod_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vaddwod_h_b(v16i8 _1, v16i8 _2) { return __lsx_vaddwod_h_b(_1, _2); } +// CHECK-LABEL: @vaddwev_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vaddwev_d_wu(v4u32 _1, v4u32 _2) { return __lsx_vaddwev_d_wu(_1, _2); } +// CHECK-LABEL: @vaddwev_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vaddwev_w_hu(v8u16 _1, v8u16 _2) { return __lsx_vaddwev_w_hu(_1, _2); } +// CHECK-LABEL: @vaddwev_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vaddwev_h_bu(v16u8 _1, v16u8 _2) { return __lsx_vaddwev_h_bu(_1, _2); } +// CHECK-LABEL: @vaddwod_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vaddwod_d_wu(v4u32 _1, v4u32 _2) { return __lsx_vaddwod_d_wu(_1, _2); } +// CHECK-LABEL: @vaddwod_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vaddwod_w_hu(v8u16 _1, v8u16 _2) { return __lsx_vaddwod_w_hu(_1, _2); } +// CHECK-LABEL: @vaddwod_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vaddwod_h_bu(v16u8 _1, v16u8 _2) { return __lsx_vaddwod_h_bu(_1, _2); } +// CHECK-LABEL: @vaddwev_d_wu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vaddwev_d_wu_w(v4u32 _1, v4i32 _2) { + return __lsx_vaddwev_d_wu_w(_1, _2); +} +// CHECK-LABEL: @vaddwev_w_hu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vaddwev_w_hu_h(v8u16 _1, v8i16 _2) { + return __lsx_vaddwev_w_hu_h(_1, _2); +} +// CHECK-LABEL: @vaddwev_h_bu_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vaddwev_h_bu_b(v16u8 _1, v16i8 _2) { + return __lsx_vaddwev_h_bu_b(_1, _2); +} +// CHECK-LABEL: @vaddwod_d_wu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vaddwod_d_wu_w(v4u32 _1, v4i32 _2) { + return __lsx_vaddwod_d_wu_w(_1, _2); +} +// CHECK-LABEL: @vaddwod_w_hu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vaddwod_w_hu_h(v8u16 _1, v8i16 _2) { + return __lsx_vaddwod_w_hu_h(_1, _2); +} +// CHECK-LABEL: @vaddwod_h_bu_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vaddwod_h_bu_b(v16u8 _1, v16i8 _2) { + return __lsx_vaddwod_h_bu_b(_1, _2); +} +// CHECK-LABEL: @vsubwev_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vsubwev_d_w(v4i32 _1, v4i32 _2) { return __lsx_vsubwev_d_w(_1, _2); } +// CHECK-LABEL: @vsubwev_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vsubwev_w_h(v8i16 _1, v8i16 _2) { return __lsx_vsubwev_w_h(_1, _2); } +// CHECK-LABEL: @vsubwev_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vsubwev_h_b(v16i8 _1, v16i8 _2) { return __lsx_vsubwev_h_b(_1, _2); } +// CHECK-LABEL: @vsubwod_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vsubwod_d_w(v4i32 _1, v4i32 _2) { return __lsx_vsubwod_d_w(_1, _2); } +// CHECK-LABEL: @vsubwod_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vsubwod_w_h(v8i16 _1, v8i16 _2) { return __lsx_vsubwod_w_h(_1, _2); } +// CHECK-LABEL: @vsubwod_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vsubwod_h_b(v16i8 _1, v16i8 _2) { return __lsx_vsubwod_h_b(_1, _2); } +// CHECK-LABEL: @vsubwev_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vsubwev_d_wu(v4u32 _1, v4u32 _2) { return __lsx_vsubwev_d_wu(_1, _2); } +// CHECK-LABEL: @vsubwev_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vsubwev_w_hu(v8u16 _1, v8u16 _2) { return __lsx_vsubwev_w_hu(_1, _2); } +// CHECK-LABEL: @vsubwev_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vsubwev_h_bu(v16u8 _1, v16u8 _2) { return __lsx_vsubwev_h_bu(_1, _2); } +// CHECK-LABEL: @vsubwod_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vsubwod_d_wu(v4u32 _1, v4u32 _2) { return __lsx_vsubwod_d_wu(_1, _2); } +// CHECK-LABEL: @vsubwod_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vsubwod_w_hu(v8u16 _1, v8u16 _2) { return __lsx_vsubwod_w_hu(_1, _2); } +// CHECK-LABEL: @vsubwod_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vsubwod_h_bu(v16u8 _1, v16u8 _2) { return __lsx_vsubwod_h_bu(_1, _2); } +// CHECK-LABEL: @vaddwev_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vaddwev_q_d(v2i64 _1, v2i64 _2) { return __lsx_vaddwev_q_d(_1, _2); } +// CHECK-LABEL: @vaddwod_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vaddwod_q_d(v2i64 _1, v2i64 _2) { return __lsx_vaddwod_q_d(_1, _2); } +// CHECK-LABEL: @vaddwev_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vaddwev_q_du(v2u64 _1, v2u64 _2) { return __lsx_vaddwev_q_du(_1, _2); } +// CHECK-LABEL: @vaddwod_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vaddwod_q_du(v2u64 _1, v2u64 _2) { return __lsx_vaddwod_q_du(_1, _2); } +// CHECK-LABEL: @vsubwev_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vsubwev_q_d(v2i64 _1, v2i64 _2) { return __lsx_vsubwev_q_d(_1, _2); } +// CHECK-LABEL: @vsubwod_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vsubwod_q_d(v2i64 _1, v2i64 _2) { return __lsx_vsubwod_q_d(_1, _2); } +// CHECK-LABEL: @vsubwev_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vsubwev_q_du(v2u64 _1, v2u64 _2) { return __lsx_vsubwev_q_du(_1, _2); } +// CHECK-LABEL: @vsubwod_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vsubwod_q_du(v2u64 _1, v2u64 _2) { return __lsx_vsubwod_q_du(_1, _2); } +// CHECK-LABEL: @vaddwev_q_du_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vaddwev_q_du_d(v2u64 _1, v2i64 _2) { + return __lsx_vaddwev_q_du_d(_1, _2); +} +// CHECK-LABEL: @vaddwod_q_du_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vaddwod_q_du_d(v2u64 _1, v2i64 _2) { + return __lsx_vaddwod_q_du_d(_1, _2); +} +// CHECK-LABEL: @vmulwev_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vmulwev_d_w(v4i32 _1, v4i32 _2) { return __lsx_vmulwev_d_w(_1, _2); } +// CHECK-LABEL: @vmulwev_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vmulwev_w_h(v8i16 _1, v8i16 _2) { return __lsx_vmulwev_w_h(_1, _2); } +// CHECK-LABEL: @vmulwev_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vmulwev_h_b(v16i8 _1, v16i8 _2) { return __lsx_vmulwev_h_b(_1, _2); } +// CHECK-LABEL: @vmulwod_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vmulwod_d_w(v4i32 _1, v4i32 _2) { return __lsx_vmulwod_d_w(_1, _2); } +// CHECK-LABEL: @vmulwod_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vmulwod_w_h(v8i16 _1, v8i16 _2) { return __lsx_vmulwod_w_h(_1, _2); } +// CHECK-LABEL: @vmulwod_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vmulwod_h_b(v16i8 _1, v16i8 _2) { return __lsx_vmulwod_h_b(_1, _2); } +// CHECK-LABEL: @vmulwev_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vmulwev_d_wu(v4u32 _1, v4u32 _2) { return __lsx_vmulwev_d_wu(_1, _2); } +// CHECK-LABEL: @vmulwev_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vmulwev_w_hu(v8u16 _1, v8u16 _2) { return __lsx_vmulwev_w_hu(_1, _2); } +// CHECK-LABEL: @vmulwev_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vmulwev_h_bu(v16u8 _1, v16u8 _2) { return __lsx_vmulwev_h_bu(_1, _2); } +// CHECK-LABEL: @vmulwod_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vmulwod_d_wu(v4u32 _1, v4u32 _2) { return __lsx_vmulwod_d_wu(_1, _2); } +// CHECK-LABEL: @vmulwod_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vmulwod_w_hu(v8u16 _1, v8u16 _2) { return __lsx_vmulwod_w_hu(_1, _2); } +// CHECK-LABEL: @vmulwod_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vmulwod_h_bu(v16u8 _1, v16u8 _2) { return __lsx_vmulwod_h_bu(_1, _2); } +// CHECK-LABEL: @vmulwev_d_wu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vmulwev_d_wu_w(v4u32 _1, v4i32 _2) { + return __lsx_vmulwev_d_wu_w(_1, _2); +} +// CHECK-LABEL: @vmulwev_w_hu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vmulwev_w_hu_h(v8u16 _1, v8i16 _2) { + return __lsx_vmulwev_w_hu_h(_1, _2); +} +// CHECK-LABEL: @vmulwev_h_bu_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vmulwev_h_bu_b(v16u8 _1, v16i8 _2) { + return __lsx_vmulwev_h_bu_b(_1, _2); +} +// CHECK-LABEL: @vmulwod_d_wu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vmulwod_d_wu_w(v4u32 _1, v4i32 _2) { + return __lsx_vmulwod_d_wu_w(_1, _2); +} +// CHECK-LABEL: @vmulwod_w_hu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vmulwod_w_hu_h(v8u16 _1, v8i16 _2) { + return __lsx_vmulwod_w_hu_h(_1, _2); +} +// CHECK-LABEL: @vmulwod_h_bu_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vmulwod_h_bu_b(v16u8 _1, v16i8 _2) { + return __lsx_vmulwod_h_bu_b(_1, _2); +} +// CHECK-LABEL: @vmulwev_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vmulwev_q_d(v2i64 _1, v2i64 _2) { return __lsx_vmulwev_q_d(_1, _2); } +// CHECK-LABEL: @vmulwod_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vmulwod_q_d(v2i64 _1, v2i64 _2) { return __lsx_vmulwod_q_d(_1, _2); } +// CHECK-LABEL: @vmulwev_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vmulwev_q_du(v2u64 _1, v2u64 _2) { return __lsx_vmulwev_q_du(_1, _2); } +// CHECK-LABEL: @vmulwod_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vmulwod_q_du(v2u64 _1, v2u64 _2) { return __lsx_vmulwod_q_du(_1, _2); } +// CHECK-LABEL: @vmulwev_q_du_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vmulwev_q_du_d(v2u64 _1, v2i64 _2) { + return __lsx_vmulwev_q_du_d(_1, _2); +} +// CHECK-LABEL: @vmulwod_q_du_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vmulwod_q_du_d(v2u64 _1, v2i64 _2) { + return __lsx_vmulwod_q_du_d(_1, _2); +} +// CHECK-LABEL: @vhaddw_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vhaddw_q_d(v2i64 _1, v2i64 _2) { return __lsx_vhaddw_q_d(_1, _2); } +// CHECK-LABEL: @vhaddw_qu_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.qu.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2u64 vhaddw_qu_du(v2u64 _1, v2u64 _2) { return __lsx_vhaddw_qu_du(_1, _2); } +// CHECK-LABEL: @vhsubw_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vhsubw_q_d(v2i64 _1, v2i64 _2) { return __lsx_vhsubw_q_d(_1, _2); } +// CHECK-LABEL: @vhsubw_qu_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.qu.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2u64 vhsubw_qu_du(v2u64 _1, v2u64 _2) { return __lsx_vhsubw_qu_du(_1, _2); } +// CHECK-LABEL: @vmaddwev_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.w(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v2i64 vmaddwev_d_w(v2i64 _1, v4i32 _2, v4i32 _3) { + return __lsx_vmaddwev_d_w(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwev_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.h(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v4i32 vmaddwev_w_h(v4i32 _1, v8i16 _2, v8i16 _3) { + return __lsx_vmaddwev_w_h(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwev_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.b(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v8i16 vmaddwev_h_b(v8i16 _1, v16i8 _2, v16i8 _3) { + return __lsx_vmaddwev_h_b(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwev_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v2u64 vmaddwev_d_wu(v2u64 _1, v4u32 _2, v4u32 _3) { + return __lsx_vmaddwev_d_wu(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwev_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v4u32 vmaddwev_w_hu(v4u32 _1, v8u16 _2, v8u16 _3) { + return __lsx_vmaddwev_w_hu(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwev_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v8u16 vmaddwev_h_bu(v8u16 _1, v16u8 _2, v16u8 _3) { + return __lsx_vmaddwev_h_bu(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwod_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.w(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v2i64 vmaddwod_d_w(v2i64 _1, v4i32 _2, v4i32 _3) { + return __lsx_vmaddwod_d_w(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwod_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.h(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v4i32 vmaddwod_w_h(v4i32 _1, v8i16 _2, v8i16 _3) { + return __lsx_vmaddwod_w_h(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwod_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.b(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v8i16 vmaddwod_h_b(v8i16 _1, v16i8 _2, v16i8 _3) { + return __lsx_vmaddwod_h_b(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwod_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v2u64 vmaddwod_d_wu(v2u64 _1, v4u32 _2, v4u32 _3) { + return __lsx_vmaddwod_d_wu(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwod_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v4u32 vmaddwod_w_hu(v4u32 _1, v8u16 _2, v8u16 _3) { + return __lsx_vmaddwod_w_hu(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwod_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v8u16 vmaddwod_h_bu(v8u16 _1, v16u8 _2, v16u8 _3) { + return __lsx_vmaddwod_h_bu(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwev_d_wu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu.w(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v2i64 vmaddwev_d_wu_w(v2i64 _1, v4u32 _2, v4i32 _3) { + return __lsx_vmaddwev_d_wu_w(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwev_w_hu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu.h(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v4i32 vmaddwev_w_hu_h(v4i32 _1, v8u16 _2, v8i16 _3) { + return __lsx_vmaddwev_w_hu_h(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwev_h_bu_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu.b(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v8i16 vmaddwev_h_bu_b(v8i16 _1, v16u8 _2, v16i8 _3) { + return __lsx_vmaddwev_h_bu_b(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwod_d_wu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu.w(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v2i64 vmaddwod_d_wu_w(v2i64 _1, v4u32 _2, v4i32 _3) { + return __lsx_vmaddwod_d_wu_w(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwod_w_hu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu.h(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v4i32 vmaddwod_w_hu_h(v4i32 _1, v8u16 _2, v8i16 _3) { + return __lsx_vmaddwod_w_hu_h(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwod_h_bu_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu.b(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v8i16 vmaddwod_h_bu_b(v8i16 _1, v16u8 _2, v16i8 _3) { + return __lsx_vmaddwod_h_bu_b(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwev_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v2i64 vmaddwev_q_d(v2i64 _1, v2i64 _2, v2i64 _3) { + return __lsx_vmaddwev_q_d(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwod_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v2i64 vmaddwod_q_d(v2i64 _1, v2i64 _2, v2i64 _3) { + return __lsx_vmaddwod_q_d(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwev_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v2u64 vmaddwev_q_du(v2u64 _1, v2u64 _2, v2u64 _3) { + return __lsx_vmaddwev_q_du(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwod_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v2u64 vmaddwod_q_du(v2u64 _1, v2u64 _2, v2u64 _3) { + return __lsx_vmaddwod_q_du(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwev_q_du_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v2i64 vmaddwev_q_du_d(v2i64 _1, v2u64 _2, v2i64 _3) { + return __lsx_vmaddwev_q_du_d(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwod_q_du_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v2i64 vmaddwod_q_du_d(v2i64 _1, v2u64 _2, v2i64 _3) { + return __lsx_vmaddwod_q_du_d(_1, _2, _3); +} +// CHECK-LABEL: @vrotr_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vrotr.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vrotr_b(v16i8 _1, v16i8 _2) { return __lsx_vrotr_b(_1, _2); } +// CHECK-LABEL: @vrotr_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vrotr.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vrotr_h(v8i16 _1, v8i16 _2) { return __lsx_vrotr_h(_1, _2); } +// CHECK-LABEL: @vrotr_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vrotr.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vrotr_w(v4i32 _1, v4i32 _2) { return __lsx_vrotr_w(_1, _2); } +// CHECK-LABEL: @vrotr_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vrotr.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vrotr_d(v2i64 _1, v2i64 _2) { return __lsx_vrotr_d(_1, _2); } +// CHECK-LABEL: @vadd_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vadd.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vadd_q(v2i64 _1, v2i64 _2) { return __lsx_vadd_q(_1, _2); } +// CHECK-LABEL: @vsub_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsub.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vsub_q(v2i64 _1, v2i64 _2) { return __lsx_vsub_q(_1, _2); } +// CHECK-LABEL: @vldrepl_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vldrepl.b(ptr [[_1:%.*]], i32 1) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to i128 +// CHECK-NEXT: ret i128 [[TMP1]] +// +v16i8 vldrepl_b(void *_1) { return __lsx_vldrepl_b(_1, 1); } +// CHECK-LABEL: @vldrepl_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vldrepl.h(ptr [[_1:%.*]], i32 2) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to i128 +// CHECK-NEXT: ret i128 [[TMP1]] +// +v8i16 vldrepl_h(void *_1) { return __lsx_vldrepl_h(_1, 2); } +// CHECK-LABEL: @vldrepl_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vldrepl.w(ptr [[_1:%.*]], i32 4) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to i128 +// CHECK-NEXT: ret i128 [[TMP1]] +// +v4i32 vldrepl_w(void *_1) { return __lsx_vldrepl_w(_1, 4); } +// CHECK-LABEL: @vldrepl_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vldrepl.d(ptr [[_1:%.*]], i32 8) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to i128 +// CHECK-NEXT: ret i128 [[TMP1]] +// +v2i64 vldrepl_d(void *_1) { return __lsx_vldrepl_d(_1, 8); } +// CHECK-LABEL: @vmskgez_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmskgez.b(<16 x i8> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vmskgez_b(v16i8 _1) { return __lsx_vmskgez_b(_1); } +// CHECK-LABEL: @vmsknz_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmsknz.b(<16 x i8> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vmsknz_b(v16i8 _1) { return __lsx_vmsknz_b(_1); } +// CHECK-LABEL: @vexth_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vexth.h.b(<16 x i8> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vexth_h_b(v16i8 _1) { return __lsx_vexth_h_b(_1); } +// CHECK-LABEL: @vexth_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vexth.w.h(<8 x i16> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vexth_w_h(v8i16 _1) { return __lsx_vexth_w_h(_1); } +// CHECK-LABEL: @vexth_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.d.w(<4 x i32> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vexth_d_w(v4i32 _1) { return __lsx_vexth_d_w(_1); } +// CHECK-LABEL: @vexth_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.q.d(<2 x i64> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vexth_q_d(v2i64 _1) { return __lsx_vexth_q_d(_1); } +// CHECK-LABEL: @vexth_hu_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vexth.hu.bu(<16 x i8> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8u16 vexth_hu_bu(v16u8 _1) { return __lsx_vexth_hu_bu(_1); } +// CHECK-LABEL: @vexth_wu_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vexth.wu.hu(<8 x i16> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4u32 vexth_wu_hu(v8u16 _1) { return __lsx_vexth_wu_hu(_1); } +// CHECK-LABEL: @vexth_du_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.du.wu(<4 x i32> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2u64 vexth_du_wu(v4u32 _1) { return __lsx_vexth_du_wu(_1); } +// CHECK-LABEL: @vexth_qu_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.qu.du(<2 x i64> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2u64 vexth_qu_du(v2u64 _1) { return __lsx_vexth_qu_du(_1); } +// CHECK-LABEL: @vrotri_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vrotri.b(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vrotri_b(v16i8 _1) { return __lsx_vrotri_b(_1, 1); } +// CHECK-LABEL: @vrotri_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vrotri.h(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vrotri_h(v8i16 _1) { return __lsx_vrotri_h(_1, 1); } +// CHECK-LABEL: @vrotri_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vrotri.w(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vrotri_w(v4i32 _1) { return __lsx_vrotri_w(_1, 1); } +// CHECK-LABEL: @vrotri_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vrotri.d(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vrotri_d(v2i64 _1) { return __lsx_vrotri_d(_1, 1); } +// CHECK-LABEL: @vextl_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vextl.q.d(<2 x i64> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vextl_q_d(v2i64 _1) { return __lsx_vextl_q_d(_1); } +// CHECK-LABEL: @vsrlni_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vsrlni_b_h(v16i8 _1, v16i8 _2) { return __lsx_vsrlni_b_h(_1, _2, 1); } +// CHECK-LABEL: @vsrlni_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vsrlni_h_w(v8i16 _1, v8i16 _2) { return __lsx_vsrlni_h_w(_1, _2, 1); } +// CHECK-LABEL: @vsrlni_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vsrlni_w_d(v4i32 _1, v4i32 _2) { return __lsx_vsrlni_w_d(_1, _2, 1); } +// CHECK-LABEL: @vsrlni_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vsrlni_d_q(v2i64 _1, v2i64 _2) { return __lsx_vsrlni_d_q(_1, _2, 1); } +// CHECK-LABEL: @vsrlrni_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlrni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vsrlrni_b_h(v16i8 _1, v16i8 _2) { return __lsx_vsrlrni_b_h(_1, _2, 1); } +// CHECK-LABEL: @vsrlrni_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlrni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vsrlrni_h_w(v8i16 _1, v8i16 _2) { return __lsx_vsrlrni_h_w(_1, _2, 1); } +// CHECK-LABEL: @vsrlrni_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlrni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vsrlrni_w_d(v4i32 _1, v4i32 _2) { return __lsx_vsrlrni_w_d(_1, _2, 1); } +// CHECK-LABEL: @vsrlrni_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlrni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vsrlrni_d_q(v2i64 _1, v2i64 _2) { return __lsx_vsrlrni_d_q(_1, _2, 1); } +// CHECK-LABEL: @vssrlni_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vssrlni_b_h(v16i8 _1, v16i8 _2) { return __lsx_vssrlni_b_h(_1, _2, 1); } +// CHECK-LABEL: @vssrlni_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vssrlni_h_w(v8i16 _1, v8i16 _2) { return __lsx_vssrlni_h_w(_1, _2, 1); } +// CHECK-LABEL: @vssrlni_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vssrlni_w_d(v4i32 _1, v4i32 _2) { return __lsx_vssrlni_w_d(_1, _2, 1); } +// CHECK-LABEL: @vssrlni_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vssrlni_d_q(v2i64 _1, v2i64 _2) { return __lsx_vssrlni_d_q(_1, _2, 1); } +// CHECK-LABEL: @vssrlni_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlni.bu.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vssrlni_bu_h(v16u8 _1, v16i8 _2) { return __lsx_vssrlni_bu_h(_1, _2, 1); } +// CHECK-LABEL: @vssrlni_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlni.hu.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8u16 vssrlni_hu_w(v8u16 _1, v8i16 _2) { return __lsx_vssrlni_hu_w(_1, _2, 1); } +// CHECK-LABEL: @vssrlni_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlni.wu.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4u32 vssrlni_wu_d(v4u32 _1, v4i32 _2) { return __lsx_vssrlni_wu_d(_1, _2, 1); } +// CHECK-LABEL: @vssrlni_du_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlni.du.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2u64 vssrlni_du_q(v2u64 _1, v2i64 _2) { return __lsx_vssrlni_du_q(_1, _2, 1); } +// CHECK-LABEL: @vssrlrni_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vssrlrni_b_h(v16i8 _1, v16i8 _2) { return __lsx_vssrlrni_b_h(_1, _2, 1); } +// CHECK-LABEL: @vssrlrni_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vssrlrni_h_w(v8i16 _1, v8i16 _2) { return __lsx_vssrlrni_h_w(_1, _2, 1); } +// CHECK-LABEL: @vssrlrni_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vssrlrni_w_d(v4i32 _1, v4i32 _2) { return __lsx_vssrlrni_w_d(_1, _2, 1); } +// CHECK-LABEL: @vssrlrni_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlrni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vssrlrni_d_q(v2i64 _1, v2i64 _2) { return __lsx_vssrlrni_d_q(_1, _2, 1); } +// CHECK-LABEL: @vssrlrni_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrni.bu.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vssrlrni_bu_h(v16u8 _1, v16i8 _2) { + return __lsx_vssrlrni_bu_h(_1, _2, 1); +} +// CHECK-LABEL: @vssrlrni_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrni.hu.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8u16 vssrlrni_hu_w(v8u16 _1, v8i16 _2) { + return __lsx_vssrlrni_hu_w(_1, _2, 1); +} +// CHECK-LABEL: @vssrlrni_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrni.wu.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4u32 vssrlrni_wu_d(v4u32 _1, v4i32 _2) { + return __lsx_vssrlrni_wu_d(_1, _2, 1); +} +// CHECK-LABEL: @vssrlrni_du_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlrni.du.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2u64 vssrlrni_du_q(v2u64 _1, v2i64 _2) { + return __lsx_vssrlrni_du_q(_1, _2, 1); +} +// CHECK-LABEL: @vsrani_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrani.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vsrani_b_h(v16i8 _1, v16i8 _2) { return __lsx_vsrani_b_h(_1, _2, 1); } +// CHECK-LABEL: @vsrani_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrani.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vsrani_h_w(v8i16 _1, v8i16 _2) { return __lsx_vsrani_h_w(_1, _2, 1); } +// CHECK-LABEL: @vsrani_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrani.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vsrani_w_d(v4i32 _1, v4i32 _2) { return __lsx_vsrani_w_d(_1, _2, 1); } +// CHECK-LABEL: @vsrani_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrani.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vsrani_d_q(v2i64 _1, v2i64 _2) { return __lsx_vsrani_d_q(_1, _2, 1); } +// CHECK-LABEL: @vsrarni_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrarni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vsrarni_b_h(v16i8 _1, v16i8 _2) { return __lsx_vsrarni_b_h(_1, _2, 1); } +// CHECK-LABEL: @vsrarni_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrarni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vsrarni_h_w(v8i16 _1, v8i16 _2) { return __lsx_vsrarni_h_w(_1, _2, 1); } +// CHECK-LABEL: @vsrarni_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrarni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vsrarni_w_d(v4i32 _1, v4i32 _2) { return __lsx_vsrarni_w_d(_1, _2, 1); } +// CHECK-LABEL: @vsrarni_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrarni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vsrarni_d_q(v2i64 _1, v2i64 _2) { return __lsx_vsrarni_d_q(_1, _2, 1); } +// CHECK-LABEL: @vssrani_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrani.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vssrani_b_h(v16i8 _1, v16i8 _2) { return __lsx_vssrani_b_h(_1, _2, 1); } +// CHECK-LABEL: @vssrani_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrani.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vssrani_h_w(v8i16 _1, v8i16 _2) { return __lsx_vssrani_h_w(_1, _2, 1); } +// CHECK-LABEL: @vssrani_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrani.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vssrani_w_d(v4i32 _1, v4i32 _2) { return __lsx_vssrani_w_d(_1, _2, 1); } +// CHECK-LABEL: @vssrani_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrani.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vssrani_d_q(v2i64 _1, v2i64 _2) { return __lsx_vssrani_d_q(_1, _2, 1); } +// CHECK-LABEL: @vssrani_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrani.bu.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vssrani_bu_h(v16u8 _1, v16i8 _2) { return __lsx_vssrani_bu_h(_1, _2, 1); } +// CHECK-LABEL: @vssrani_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrani.hu.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8u16 vssrani_hu_w(v8u16 _1, v8i16 _2) { return __lsx_vssrani_hu_w(_1, _2, 1); } +// CHECK-LABEL: @vssrani_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrani.wu.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4u32 vssrani_wu_d(v4u32 _1, v4i32 _2) { return __lsx_vssrani_wu_d(_1, _2, 1); } +// CHECK-LABEL: @vssrani_du_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrani.du.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2u64 vssrani_du_q(v2u64 _1, v2i64 _2) { return __lsx_vssrani_du_q(_1, _2, 1); } +// CHECK-LABEL: @vssrarni_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vssrarni_b_h(v16i8 _1, v16i8 _2) { return __lsx_vssrarni_b_h(_1, _2, 1); } +// CHECK-LABEL: @vssrarni_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vssrarni_h_w(v8i16 _1, v8i16 _2) { return __lsx_vssrarni_h_w(_1, _2, 1); } +// CHECK-LABEL: @vssrarni_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vssrarni_w_d(v4i32 _1, v4i32 _2) { return __lsx_vssrarni_w_d(_1, _2, 1); } +// CHECK-LABEL: @vssrarni_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrarni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vssrarni_d_q(v2i64 _1, v2i64 _2) { return __lsx_vssrarni_d_q(_1, _2, 1); } +// CHECK-LABEL: @vssrarni_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarni.bu.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vssrarni_bu_h(v16u8 _1, v16i8 _2) { + return __lsx_vssrarni_bu_h(_1, _2, 1); +} +// CHECK-LABEL: @vssrarni_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarni.hu.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8u16 vssrarni_hu_w(v8u16 _1, v8i16 _2) { + return __lsx_vssrarni_hu_w(_1, _2, 1); +} +// CHECK-LABEL: @vssrarni_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarni.wu.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4u32 vssrarni_wu_d(v4u32 _1, v4i32 _2) { + return __lsx_vssrarni_wu_d(_1, _2, 1); +} +// CHECK-LABEL: @vssrarni_du_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrarni.du.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2u64 vssrarni_du_q(v2u64 _1, v2i64 _2) { + return __lsx_vssrarni_du_q(_1, _2, 1); +} +// CHECK-LABEL: @vpermi_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpermi.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vpermi_w(v4i32 _1, v4i32 _2) { return __lsx_vpermi_w(_1, _2, 1); } +// CHECK-LABEL: @vld( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vld(ptr [[_1:%.*]], i32 1) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to i128 +// CHECK-NEXT: ret i128 [[TMP1]] +// +v16i8 vld(void *_1) { return __lsx_vld(_1, 1); } +// CHECK-LABEL: @vst( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vst(<16 x i8> [[TMP0]], ptr [[_2:%.*]], i32 1) +// CHECK-NEXT: ret void +// +void vst(v16i8 _1, void *_2) { return __lsx_vst(_1, _2, 1); } +// CHECK-LABEL: @vssrlrn_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrn.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vssrlrn_b_h(v8i16 _1, v8i16 _2) { return __lsx_vssrlrn_b_h(_1, _2); } +// CHECK-LABEL: @vssrlrn_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrn.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vssrlrn_h_w(v4i32 _1, v4i32 _2) { return __lsx_vssrlrn_h_w(_1, _2); } +// CHECK-LABEL: @vssrlrn_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrn.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vssrlrn_w_d(v2i64 _1, v2i64 _2) { return __lsx_vssrlrn_w_d(_1, _2); } +// CHECK-LABEL: @vssrln_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrln.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vssrln_b_h(v8i16 _1, v8i16 _2) { return __lsx_vssrln_b_h(_1, _2); } +// CHECK-LABEL: @vssrln_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrln.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vssrln_h_w(v4i32 _1, v4i32 _2) { return __lsx_vssrln_h_w(_1, _2); } +// CHECK-LABEL: @vssrln_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrln.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vssrln_w_d(v2i64 _1, v2i64 _2) { return __lsx_vssrln_w_d(_1, _2); } +// CHECK-LABEL: @vorn_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vorn.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vorn_v(v16i8 _1, v16i8 _2) { return __lsx_vorn_v(_1, _2); } +// CHECK-LABEL: @vldi( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vldi(i32 1) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to i128 +// CHECK-NEXT: ret i128 [[TMP1]] +// +v2i64 vldi() { return __lsx_vldi(1); } +// CHECK-LABEL: @vshuf_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vshuf.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v16i8 vshuf_b(v16i8 _1, v16i8 _2, v16i8 _3) { + return __lsx_vshuf_b(_1, _2, _3); +} +// CHECK-LABEL: @vldx( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vldx(ptr [[_1:%.*]], i64 1) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to i128 +// CHECK-NEXT: ret i128 [[TMP1]] +// +v16i8 vldx(void *_1) { return __lsx_vldx(_1, 1); } +// CHECK-LABEL: @vstx( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstx(<16 x i8> [[TMP0]], ptr [[_2:%.*]], i64 1) +// CHECK-NEXT: ret void +// +void vstx(v16i8 _1, void *_2) { return __lsx_vstx(_1, _2, 1); } +// CHECK-LABEL: @vextl_qu_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vextl.qu.du(<2 x i64> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2u64 vextl_qu_du(v2u64 _1) { return __lsx_vextl_qu_du(_1); } +// CHECK-LABEL: @bnz_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.b(<16 x i8> [[TMP0]]) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int bnz_b(v16u8 _1) { return __lsx_bnz_b(_1); } +// CHECK-LABEL: @bnz_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.d(<2 x i64> [[TMP0]]) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int bnz_d(v2u64 _1) { return __lsx_bnz_d(_1); } +// CHECK-LABEL: @bnz_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.h(<8 x i16> [[TMP0]]) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int bnz_h(v8u16 _1) { return __lsx_bnz_h(_1); } +// CHECK-LABEL: @bnz_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.v(<16 x i8> [[TMP0]]) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int bnz_v(v16u8 _1) { return __lsx_bnz_v(_1); } +// CHECK-LABEL: @bnz_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.w(<4 x i32> [[TMP0]]) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int bnz_w(v4u32 _1) { return __lsx_bnz_w(_1); } +// CHECK-LABEL: @bz_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.b(<16 x i8> [[TMP0]]) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int bz_b(v16u8 _1) { return __lsx_bz_b(_1); } +// CHECK-LABEL: @bz_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.d(<2 x i64> [[TMP0]]) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int bz_d(v2u64 _1) { return __lsx_bz_d(_1); } +// CHECK-LABEL: @bz_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.h(<8 x i16> [[TMP0]]) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int bz_h(v8u16 _1) { return __lsx_bz_h(_1); } +// CHECK-LABEL: @bz_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.v(<16 x i8> [[TMP0]]) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int bz_v(v16u8 _1) { return __lsx_bz_v(_1); } +// CHECK-LABEL: @bz_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.w(<4 x i32> [[TMP0]]) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int bz_w(v4u32 _1) { return __lsx_bz_w(_1); } +// CHECK-LABEL: @vfcmp_caf_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.caf.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vfcmp_caf_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_caf_d(_1, _2); } +// CHECK-LABEL: @vfcmp_caf_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.caf.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vfcmp_caf_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_caf_s(_1, _2); } +// CHECK-LABEL: @vfcmp_ceq_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.ceq.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vfcmp_ceq_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_ceq_d(_1, _2); } +// CHECK-LABEL: @vfcmp_ceq_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.ceq.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vfcmp_ceq_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_ceq_s(_1, _2); } +// CHECK-LABEL: @vfcmp_cle_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cle.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vfcmp_cle_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cle_d(_1, _2); } +// CHECK-LABEL: @vfcmp_cle_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cle.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vfcmp_cle_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cle_s(_1, _2); } +// CHECK-LABEL: @vfcmp_clt_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.clt.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vfcmp_clt_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_clt_d(_1, _2); } +// CHECK-LABEL: @vfcmp_clt_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.clt.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vfcmp_clt_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_clt_s(_1, _2); } +// CHECK-LABEL: @vfcmp_cne_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cne.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vfcmp_cne_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cne_d(_1, _2); } +// CHECK-LABEL: @vfcmp_cne_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cne.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vfcmp_cne_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cne_s(_1, _2); } +// CHECK-LABEL: @vfcmp_cor_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cor.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vfcmp_cor_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cor_d(_1, _2); } +// CHECK-LABEL: @vfcmp_cor_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cor.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vfcmp_cor_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cor_s(_1, _2); } +// CHECK-LABEL: @vfcmp_cueq_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cueq.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vfcmp_cueq_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cueq_d(_1, _2); } +// CHECK-LABEL: @vfcmp_cueq_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cueq.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vfcmp_cueq_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cueq_s(_1, _2); } +// CHECK-LABEL: @vfcmp_cule_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cule.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vfcmp_cule_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cule_d(_1, _2); } +// CHECK-LABEL: @vfcmp_cule_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cule.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vfcmp_cule_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cule_s(_1, _2); } +// CHECK-LABEL: @vfcmp_cult_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cult.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vfcmp_cult_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cult_d(_1, _2); } +// CHECK-LABEL: @vfcmp_cult_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cult.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vfcmp_cult_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cult_s(_1, _2); } +// CHECK-LABEL: @vfcmp_cun_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cun.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vfcmp_cun_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cun_d(_1, _2); } +// CHECK-LABEL: @vfcmp_cune_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cune.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vfcmp_cune_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cune_d(_1, _2); } +// CHECK-LABEL: @vfcmp_cune_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cune.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vfcmp_cune_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cune_s(_1, _2); } +// CHECK-LABEL: @vfcmp_cun_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cun.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vfcmp_cun_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cun_s(_1, _2); } +// CHECK-LABEL: @vfcmp_saf_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.saf.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vfcmp_saf_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_saf_d(_1, _2); } +// CHECK-LABEL: @vfcmp_saf_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.saf.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vfcmp_saf_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_saf_s(_1, _2); } +// CHECK-LABEL: @vfcmp_seq_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.seq.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vfcmp_seq_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_seq_d(_1, _2); } +// CHECK-LABEL: @vfcmp_seq_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.seq.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vfcmp_seq_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_seq_s(_1, _2); } +// CHECK-LABEL: @vfcmp_sle_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sle.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vfcmp_sle_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sle_d(_1, _2); } +// CHECK-LABEL: @vfcmp_sle_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sle.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vfcmp_sle_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sle_s(_1, _2); } +// CHECK-LABEL: @vfcmp_slt_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.slt.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vfcmp_slt_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_slt_d(_1, _2); } +// CHECK-LABEL: @vfcmp_slt_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.slt.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vfcmp_slt_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_slt_s(_1, _2); } +// CHECK-LABEL: @vfcmp_sne_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sne.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vfcmp_sne_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sne_d(_1, _2); } +// CHECK-LABEL: @vfcmp_sne_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sne.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vfcmp_sne_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sne_s(_1, _2); } +// CHECK-LABEL: @vfcmp_sor_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sor.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vfcmp_sor_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sor_d(_1, _2); } +// CHECK-LABEL: @vfcmp_sor_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sor.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vfcmp_sor_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sor_s(_1, _2); } +// CHECK-LABEL: @vfcmp_sueq_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sueq.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vfcmp_sueq_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sueq_d(_1, _2); } +// CHECK-LABEL: @vfcmp_sueq_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sueq.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vfcmp_sueq_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sueq_s(_1, _2); } +// CHECK-LABEL: @vfcmp_sule_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sule.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vfcmp_sule_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sule_d(_1, _2); } +// CHECK-LABEL: @vfcmp_sule_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sule.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vfcmp_sule_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sule_s(_1, _2); } +// CHECK-LABEL: @vfcmp_sult_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sult.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vfcmp_sult_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sult_d(_1, _2); } +// CHECK-LABEL: @vfcmp_sult_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sult.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vfcmp_sult_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sult_s(_1, _2); } +// CHECK-LABEL: @vfcmp_sun_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sun.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vfcmp_sun_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sun_d(_1, _2); } +// CHECK-LABEL: @vfcmp_sune_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sune.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vfcmp_sune_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sune_d(_1, _2); } +// CHECK-LABEL: @vfcmp_sune_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sune.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vfcmp_sune_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sune_s(_1, _2); } +// CHECK-LABEL: @vfcmp_sun_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sun.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vfcmp_sun_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sun_s(_1, _2); } +// CHECK-LABEL: @vrepli_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vrepli.b(i32 1) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to i128 +// CHECK-NEXT: ret i128 [[TMP1]] +// +v16i8 vrepli_b() { return __lsx_vrepli_b(1); } +// CHECK-LABEL: @vrepli_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vrepli.d(i32 1) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to i128 +// CHECK-NEXT: ret i128 [[TMP1]] +// +v2i64 vrepli_d() { return __lsx_vrepli_d(1); } +// CHECK-LABEL: @vrepli_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vrepli.h(i32 1) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to i128 +// CHECK-NEXT: ret i128 [[TMP1]] +// +v8i16 vrepli_h() { return __lsx_vrepli_h(1); } +// CHECK-LABEL: @vrepli_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vrepli.w(i32 1) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to i128 +// CHECK-NEXT: ret i128 [[TMP1]] +// +v4i32 vrepli_w() { return __lsx_vrepli_w(1); } diff --git a/clang/test/CodeGen/LoongArch/lsx/builtin-error.c b/clang/test/CodeGen/LoongArch/lsx/builtin-error.c new file mode 100644 index 0000000000000000000000000000000000000000..3fc5f73f11934e6a7aaf806d0d3a6f0c89853fcd --- /dev/null +++ b/clang/test/CodeGen/LoongArch/lsx/builtin-error.c @@ -0,0 +1,1382 @@ +// RUN: %clang_cc1 -triple loongarch64 -target-feature +lsx -verify %s + +typedef signed char v16i8 __attribute__((vector_size(16), aligned(16))); +typedef signed char v16i8_b __attribute__((vector_size(16), aligned(1))); +typedef unsigned char v16u8 __attribute__((vector_size(16), aligned(16))); +typedef unsigned char v16u8_b __attribute__((vector_size(16), aligned(1))); +typedef short v8i16 __attribute__((vector_size(16), aligned(16))); +typedef short v8i16_h __attribute__((vector_size(16), aligned(2))); +typedef unsigned short v8u16 __attribute__((vector_size(16), aligned(16))); +typedef unsigned short v8u16_h __attribute__((vector_size(16), aligned(2))); +typedef int v4i32 __attribute__((vector_size(16), aligned(16))); +typedef int v4i32_w __attribute__((vector_size(16), aligned(4))); +typedef unsigned int v4u32 __attribute__((vector_size(16), aligned(16))); +typedef unsigned int v4u32_w __attribute__((vector_size(16), aligned(4))); +typedef long long v2i64 __attribute__((vector_size(16), aligned(16))); +typedef long long v2i64_d __attribute__((vector_size(16), aligned(8))); +typedef unsigned long long v2u64 __attribute__((vector_size(16), aligned(16))); +typedef unsigned long long v2u64_d __attribute__((vector_size(16), aligned(8))); +typedef float v4f32 __attribute__((vector_size(16), aligned(16))); +typedef float v4f32_w __attribute__((vector_size(16), aligned(4))); +typedef double v2f64 __attribute__((vector_size(16), aligned(16))); +typedef double v2f64_d __attribute__((vector_size(16), aligned(8))); + +typedef long long __m128i __attribute__((__vector_size__(16), __may_alias__)); +typedef float __m128 __attribute__((__vector_size__(16), __may_alias__)); +typedef double __m128d __attribute__((__vector_size__(16), __may_alias__)); + +v16i8 vslli_b(v16i8 _1, int var) { + v16i8 res = __builtin_lsx_vslli_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vslli_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vslli_b(_1, var); // expected-error {{argument to '__builtin_lsx_vslli_b' must be a constant integer}} + return res; +} + +v8i16 vslli_h(v8i16 _1, int var) { + v8i16 res = __builtin_lsx_vslli_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vslli_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vslli_h(_1, var); // expected-error {{argument to '__builtin_lsx_vslli_h' must be a constant integer}} + return res; +} + +v4i32 vslli_w(v4i32 _1, int var) { + v4i32 res = __builtin_lsx_vslli_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vslli_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vslli_w(_1, var); // expected-error {{argument to '__builtin_lsx_vslli_w' must be a constant integer}} + return res; +} + +v2i64 vslli_d(v2i64 _1, int var) { + v2i64 res = __builtin_lsx_vslli_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vslli_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vslli_d(_1, var); // expected-error {{argument to '__builtin_lsx_vslli_d' must be a constant integer}} + return res; +} + +v16i8 vsrai_b(v16i8 _1, int var) { + v16i8 res = __builtin_lsx_vsrai_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vsrai_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vsrai_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsrai_b' must be a constant integer}} + return res; +} + +v8i16 vsrai_h(v8i16 _1, int var) { + v8i16 res = __builtin_lsx_vsrai_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vsrai_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vsrai_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsrai_h' must be a constant integer}} + return res; +} + +v4i32 vsrai_w(v4i32 _1, int var) { + v4i32 res = __builtin_lsx_vsrai_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsrai_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsrai_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsrai_w' must be a constant integer}} + return res; +} + +v2i64 vsrai_d(v2i64 _1, int var) { + v2i64 res = __builtin_lsx_vsrai_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vsrai_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vsrai_d(_1, var); // expected-error {{argument to '__builtin_lsx_vsrai_d' must be a constant integer}} + return res; +} + +v16i8 vsrari_b(v16i8 _1, int var) { + v16i8 res = __builtin_lsx_vsrari_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vsrari_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vsrari_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsrari_b' must be a constant integer}} + return res; +} + +v8i16 vsrari_h(v8i16 _1, int var) { + v8i16 res = __builtin_lsx_vsrari_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vsrari_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vsrari_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsrari_h' must be a constant integer}} + return res; +} + +v4i32 vsrari_w(v4i32 _1, int var) { + v4i32 res = __builtin_lsx_vsrari_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsrari_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsrari_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsrari_w' must be a constant integer}} + return res; +} + +v2i64 vsrari_d(v2i64 _1, int var) { + v2i64 res = __builtin_lsx_vsrari_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vsrari_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vsrari_d(_1, var); // expected-error {{argument to '__builtin_lsx_vsrari_d' must be a constant integer}} + return res; +} + +v16i8 vsrli_b(v16i8 _1, int var) { + v16i8 res = __builtin_lsx_vsrli_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vsrli_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vsrli_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsrli_b' must be a constant integer}} + return res; +} + +v8i16 vsrli_h(v8i16 _1, int var) { + v8i16 res = __builtin_lsx_vsrli_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vsrli_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vsrli_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsrli_h' must be a constant integer}} + return res; +} + +v4i32 vsrli_w(v4i32 _1, int var) { + v4i32 res = __builtin_lsx_vsrli_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsrli_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsrli_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsrli_w' must be a constant integer}} + return res; +} + +v2i64 vsrli_d(v2i64 _1, int var) { + v2i64 res = __builtin_lsx_vsrli_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vsrli_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vsrli_d(_1, var); // expected-error {{argument to '__builtin_lsx_vsrli_d' must be a constant integer}} + return res; +} + +v16i8 vsrlri_b(v16i8 _1, int var) { + v16i8 res = __builtin_lsx_vsrlri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vsrlri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vsrlri_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsrlri_b' must be a constant integer}} + return res; +} + +v8i16 vsrlri_h(v8i16 _1, int var) { + v8i16 res = __builtin_lsx_vsrlri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vsrlri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vsrlri_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsrlri_h' must be a constant integer}} + return res; +} + +v4i32 vsrlri_w(v4i32 _1, int var) { + v4i32 res = __builtin_lsx_vsrlri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsrlri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsrlri_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsrlri_w' must be a constant integer}} + return res; +} + +v2i64 vsrlri_d(v2i64 _1, int var) { + v2i64 res = __builtin_lsx_vsrlri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vsrlri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vsrlri_d(_1, var); // expected-error {{argument to '__builtin_lsx_vsrlri_d' must be a constant integer}} + return res; +} + +v16u8 vbitclri_b(v16u8 _1, int var) { + v16u8 res = __builtin_lsx_vbitclri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vbitclri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vbitclri_b(_1, var); // expected-error {{argument to '__builtin_lsx_vbitclri_b' must be a constant integer}} + return res; +} + +v8u16 vbitclri_h(v8u16 _1, int var) { + v8u16 res = __builtin_lsx_vbitclri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vbitclri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vbitclri_h(_1, var); // expected-error {{argument to '__builtin_lsx_vbitclri_h' must be a constant integer}} + return res; +} + +v4u32 vbitclri_w(v4u32 _1, int var) { + v4u32 res = __builtin_lsx_vbitclri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vbitclri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vbitclri_w(_1, var); // expected-error {{argument to '__builtin_lsx_vbitclri_w' must be a constant integer}} + return res; +} + +v2u64 vbitclri_d(v2u64 _1, int var) { + v2u64 res = __builtin_lsx_vbitclri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vbitclri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vbitclri_d(_1, var); // expected-error {{argument to '__builtin_lsx_vbitclri_d' must be a constant integer}} + return res; +} + +v16u8 vbitseti_b(v16u8 _1, int var) { + v16u8 res = __builtin_lsx_vbitseti_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vbitseti_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vbitseti_b(_1, var); // expected-error {{argument to '__builtin_lsx_vbitseti_b' must be a constant integer}} + return res; +} + +v8u16 vbitseti_h(v8u16 _1, int var) { + v8u16 res = __builtin_lsx_vbitseti_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vbitseti_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vbitseti_h(_1, var); // expected-error {{argument to '__builtin_lsx_vbitseti_h' must be a constant integer}} + return res; +} + +v4u32 vbitseti_w(v4u32 _1, int var) { + v4u32 res = __builtin_lsx_vbitseti_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vbitseti_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vbitseti_w(_1, var); // expected-error {{argument to '__builtin_lsx_vbitseti_w' must be a constant integer}} + return res; +} + +v2u64 vbitseti_d(v2u64 _1, int var) { + v2u64 res = __builtin_lsx_vbitseti_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vbitseti_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vbitseti_d(_1, var); // expected-error {{argument to '__builtin_lsx_vbitseti_d' must be a constant integer}} + return res; +} + +v16u8 vbitrevi_b(v16u8 _1, int var) { + v16u8 res = __builtin_lsx_vbitrevi_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vbitrevi_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vbitrevi_b(_1, var); // expected-error {{argument to '__builtin_lsx_vbitrevi_b' must be a constant integer}} + return res; +} + +v8u16 vbitrevi_h(v8u16 _1, int var) { + v8u16 res = __builtin_lsx_vbitrevi_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vbitrevi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vbitrevi_h(_1, var); // expected-error {{argument to '__builtin_lsx_vbitrevi_h' must be a constant integer}} + return res; +} + +v4u32 vbitrevi_w(v4u32 _1, int var) { + v4u32 res = __builtin_lsx_vbitrevi_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vbitrevi_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vbitrevi_w(_1, var); // expected-error {{argument to '__builtin_lsx_vbitrevi_w' must be a constant integer}} + return res; +} + +v2u64 vbitrevi_d(v2u64 _1, int var) { + v2u64 res = __builtin_lsx_vbitrevi_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vbitrevi_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vbitrevi_d(_1, var); // expected-error {{argument to '__builtin_lsx_vbitrevi_d' must be a constant integer}} + return res; +} + +v16i8 vaddi_bu(v16i8 _1, int var) { + v16i8 res = __builtin_lsx_vaddi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vaddi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vaddi_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vaddi_bu' must be a constant integer}} + return res; +} + +v8i16 vaddi_hu(v8i16 _1, int var) { + v8i16 res = __builtin_lsx_vaddi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vaddi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vaddi_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vaddi_hu' must be a constant integer}} + return res; +} + +v4i32 vaddi_wu(v4i32 _1, int var) { + v4i32 res = __builtin_lsx_vaddi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vaddi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vaddi_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vaddi_wu' must be a constant integer}} + return res; +} + +v2i64 vaddi_du(v2i64 _1, int var) { + v2i64 res = __builtin_lsx_vaddi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vaddi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vaddi_du(_1, var); // expected-error {{argument to '__builtin_lsx_vaddi_du' must be a constant integer}} + return res; +} + +v16i8 vsubi_bu(v16i8 _1, int var) { + v16i8 res = __builtin_lsx_vsubi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsubi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsubi_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vsubi_bu' must be a constant integer}} + return res; +} + +v8i16 vsubi_hu(v8i16 _1, int var) { + v8i16 res = __builtin_lsx_vsubi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsubi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsubi_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vsubi_hu' must be a constant integer}} + return res; +} + +v4i32 vsubi_wu(v4i32 _1, int var) { + v4i32 res = __builtin_lsx_vsubi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsubi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsubi_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vsubi_wu' must be a constant integer}} + return res; +} + +v2i64 vsubi_du(v2i64 _1, int var) { + v2i64 res = __builtin_lsx_vsubi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsubi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsubi_du(_1, var); // expected-error {{argument to '__builtin_lsx_vsubi_du' must be a constant integer}} + return res; +} + +v16i8 vmaxi_b(v16i8 _1, int var) { + v16i8 res = __builtin_lsx_vmaxi_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vmaxi_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vmaxi_b(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_b' must be a constant integer}} + return res; +} + +v8i16 vmaxi_h(v8i16 _1, int var) { + v8i16 res = __builtin_lsx_vmaxi_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vmaxi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vmaxi_h(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_h' must be a constant integer}} + return res; +} + +v4i32 vmaxi_w(v4i32 _1, int var) { + v4i32 res = __builtin_lsx_vmaxi_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vmaxi_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vmaxi_w(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_w' must be a constant integer}} + return res; +} + +v2i64 vmaxi_d(v2i64 _1, int var) { + v2i64 res = __builtin_lsx_vmaxi_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vmaxi_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vmaxi_d(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_d' must be a constant integer}} + return res; +} + +v16u8 vmaxi_bu(v16u8 _1, int var) { + v16u8 res = __builtin_lsx_vmaxi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vmaxi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vmaxi_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_bu' must be a constant integer}} + return res; +} + +v8u16 vmaxi_hu(v8u16 _1, int var) { + v8u16 res = __builtin_lsx_vmaxi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vmaxi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vmaxi_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_hu' must be a constant integer}} + return res; +} + +v4u32 vmaxi_wu(v4u32 _1, int var) { + v4u32 res = __builtin_lsx_vmaxi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vmaxi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vmaxi_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_wu' must be a constant integer}} + return res; +} + +v2u64 vmaxi_du(v2u64 _1, int var) { + v2u64 res = __builtin_lsx_vmaxi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vmaxi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vmaxi_du(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_du' must be a constant integer}} + return res; +} + +v16i8 vmini_b(v16i8 _1, int var) { + v16i8 res = __builtin_lsx_vmini_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vmini_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vmini_b(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_b' must be a constant integer}} + return res; +} + +v8i16 vmini_h(v8i16 _1, int var) { + v8i16 res = __builtin_lsx_vmini_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vmini_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vmini_h(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_h' must be a constant integer}}} + return res; +} + +v4i32 vmini_w(v4i32 _1, int var) { + v4i32 res = __builtin_lsx_vmini_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vmini_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vmini_w(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_w' must be a constant integer}} + return res; +} + +v2i64 vmini_d(v2i64 _1, int var) { + v2i64 res = __builtin_lsx_vmini_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vmini_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vmini_d(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_d' must be a constant integer}} + return res; +} + +v16u8 vmini_bu(v16u8 _1, int var) { + v16u8 res = __builtin_lsx_vmini_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vmini_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vmini_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_bu' must be a constant integer}} + return res; +} + +v8u16 vmini_hu(v8u16 _1, int var) { + v8u16 res = __builtin_lsx_vmini_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vmini_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vmini_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_hu' must be a constant integer}} + return res; +} + +v4u32 vmini_wu(v4u32 _1, int var) { + v4u32 res = __builtin_lsx_vmini_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vmini_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vmini_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_wu' must be a constant integer}} + return res; +} + +v2u64 vmini_du(v2u64 _1, int var) { + v2u64 res = __builtin_lsx_vmini_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vmini_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vmini_du(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_du' must be a constant integer}} + return res; +} + +v16i8 vseqi_b(v16i8 _1, int var) { + v16i8 res = __builtin_lsx_vseqi_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vseqi_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vseqi_b(_1, var); // expected-error {{argument to '__builtin_lsx_vseqi_b' must be a constant integer}} + return res; +} + +v8i16 vseqi_h(v8i16 _1, int var) { + v8i16 res = __builtin_lsx_vseqi_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vseqi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vseqi_h(_1, var); // expected-error {{argument to '__builtin_lsx_vseqi_h' must be a constant integer}} + return res; +} + +v4i32 vseqi_w(v4i32 _1, int var) { + v4i32 res = __builtin_lsx_vseqi_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vseqi_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vseqi_w(_1, var); // expected-error {{argument to '__builtin_lsx_vseqi_w' must be a constant integer}} + return res; +} + +v2i64 vseqi_d(v2i64 _1, int var) { + v2i64 res = __builtin_lsx_vseqi_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vseqi_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vseqi_d(_1, var); // expected-error {{argument to '__builtin_lsx_vseqi_d' must be a constant integer}} + return res; +} + +v16i8 vslti_b(v16i8 _1, int var) { + v16i8 res = __builtin_lsx_vslti_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vslti_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vslti_b(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_b' must be a constant integer}} + return res; +} + +v8i16 vslti_h(v8i16 _1, int var) { + v8i16 res = __builtin_lsx_vslti_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vslti_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vslti_h(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_h' must be a constant integer}} + return res; +} + +v4i32 vslti_w(v4i32 _1, int var) { + v4i32 res = __builtin_lsx_vslti_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vslti_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vslti_w(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_w' must be a constant integer}} + return res; +} + +v2i64 vslti_d(v2i64 _1, int var) { + v2i64 res = __builtin_lsx_vslti_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vslti_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vslti_d(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_d' must be a constant integer}} + return res; +} + +v16i8 vslti_bu(v16u8 _1, int var) { + v16i8 res = __builtin_lsx_vslti_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vslti_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vslti_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_bu' must be a constant integer}} + return res; +} + +v8i16 vslti_hu(v8u16 _1, int var) { + v8i16 res = __builtin_lsx_vslti_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vslti_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vslti_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_hu' must be a constant integer}} + return res; +} + +v4i32 vslti_wu(v4u32 _1, int var) { + v4i32 res = __builtin_lsx_vslti_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vslti_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vslti_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_wu' must be a constant integer}} + return res; +} + +v2i64 vslti_du(v2u64 _1, int var) { + v2i64 res = __builtin_lsx_vslti_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vslti_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vslti_du(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_du' must be a constant integer}} + return res; +} + +v16i8 vslei_b(v16i8 _1, int var) { + v16i8 res = __builtin_lsx_vslei_b(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vslei_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vslei_b(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_b' must be a constant integer}} + return res; +} + +v8i16 vslei_h(v8i16 _1, int var) { + v8i16 res = __builtin_lsx_vslei_h(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vslei_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vslei_h(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_h' must be a constant integer}} + return res; +} + +v4i32 vslei_w(v4i32 _1, int var) { + v4i32 res = __builtin_lsx_vslei_w(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vslei_w(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vslei_w(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_w' must be a constant integer}} + return res; +} + +v2i64 vslei_d(v2i64 _1, int var) { + v2i64 res = __builtin_lsx_vslei_d(_1, -17); // expected-error {{argument value -17 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vslei_d(_1, 16); // expected-error {{argument value 16 is outside the valid range [-16, 15]}} + res |= __builtin_lsx_vslei_d(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_d' must be a constant integer}} + return res; +} + +v16i8 vslei_bu(v16u8 _1, int var) { + v16i8 res = __builtin_lsx_vslei_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vslei_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vslei_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_bu' must be a constant integer}} + return res; +} + +v8i16 vslei_hu(v8u16 _1, int var) { + v8i16 res = __builtin_lsx_vslei_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vslei_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vslei_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_hu' must be a constant integer}} + return res; +} + +v4i32 vslei_wu(v4u32 _1, int var) { + v4i32 res = __builtin_lsx_vslei_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vslei_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vslei_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_wu' must be a constant integer}} + return res; +} + +v2i64 vslei_du(v2u64 _1, int var) { + v2i64 res = __builtin_lsx_vslei_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vslei_du(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vslei_du(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_du' must be a constant integer}} + return res; +} + +v16i8 vsat_b(v16i8 _1, int var) { + v16i8 res = __builtin_lsx_vsat_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vsat_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vsat_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_b' must be a constant integer}} + return res; +} + +v8i16 vsat_h(v8i16 _1, int var) { + v8i16 res = __builtin_lsx_vsat_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vsat_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vsat_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_h' must be a constant integer}} + return res; +} + +v4i32 vsat_w(v4i32 _1, int var) { + v4i32 res = __builtin_lsx_vsat_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsat_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsat_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_w' must be a constant integer}} + return res; +} + +v2i64 vsat_d(v2i64 _1, int var) { + v2i64 res = __builtin_lsx_vsat_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vsat_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vsat_d(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_d' must be a constant integer}} + return res; +} + +v16u8 vsat_bu(v16u8 _1, int var) { + v16u8 res = __builtin_lsx_vsat_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vsat_bu(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vsat_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_bu' must be a constant integer}} + return res; +} + +v8u16 vsat_hu(v8u16 _1, int var) { + v8u16 res = __builtin_lsx_vsat_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vsat_hu(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vsat_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_hu' must be a constant integer}} + return res; +} + +v4u32 vsat_wu(v4u32 _1, int var) { + v4u32 res = __builtin_lsx_vsat_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsat_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsat_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_wu' must be a constant integer}} + return res; +} + +v2u64 vsat_du(v2u64 _1, int var) { + v2u64 res = __builtin_lsx_vsat_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vsat_du(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vsat_du(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_du' must be a constant integer}} + return res; +} + +v16i8 vreplvei_b(v16i8 _1, int var) { + v16i8 res = __builtin_lsx_vreplvei_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vreplvei_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vreplvei_b(_1, var); // expected-error {{argument to '__builtin_lsx_vreplvei_b' must be a constant integer}} + return res; +} + +v8i16 vreplvei_h(v8i16 _1, int var) { + v8i16 res = __builtin_lsx_vreplvei_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vreplvei_h(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vreplvei_h(_1, var); // expected-error {{argument to '__builtin_lsx_vreplvei_h' must be a constant integer}} + return res; +} + +v4i32 vreplvei_w(v4i32 _1, int var) { + v4i32 res = __builtin_lsx_vreplvei_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} + res |= __builtin_lsx_vreplvei_w(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + res |= __builtin_lsx_vreplvei_w(_1, var); // expected-error {{argument to '__builtin_lsx_vreplvei_w' must be a constant integer}} + return res; +} + +v2i64 vreplvei_d(v2i64 _1, int var) { + v2i64 res = __builtin_lsx_vreplvei_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 1]}} + res |= __builtin_lsx_vreplvei_d(_1, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} + res |= __builtin_lsx_vreplvei_d(_1, var); // expected-error {{argument to '__builtin_lsx_vreplvei_d' must be a constant integer}} + return res; +} + +v16u8 vandi_b(v16u8 _1, int var) { + v16u8 res = __builtin_lsx_vandi_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __builtin_lsx_vandi_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __builtin_lsx_vandi_b(_1, var); // expected-error {{argument to '__builtin_lsx_vandi_b' must be a constant integer}} + return res; +} + +v16u8 vori_b(v16u8 _1, int var) { + v16u8 res = __builtin_lsx_vori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __builtin_lsx_vori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __builtin_lsx_vori_b(_1, var); // expected-error {{argument to '__builtin_lsx_vori_b' must be a constant integer}} + return res; +} + +v16u8 vnori_b(v16u8 _1, int var) { + v16u8 res = __builtin_lsx_vnori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __builtin_lsx_vnori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __builtin_lsx_vnori_b(_1, var); // expected-error {{argument to '__builtin_lsx_vnori_b' must be a constant integer}} + return res; +} + +v16u8 vxori_b(v16u8 _1, int var) { + v16u8 res = __builtin_lsx_vxori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __builtin_lsx_vxori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __builtin_lsx_vxori_b(_1, var); // expected-error {{argument to '__builtin_lsx_vxori_b' must be a constant integer}} + return res; +} + +v16u8 vbitseli_b(v16u8 _1, v16u8 _2, int var) { + v16u8 res = __builtin_lsx_vbitseli_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __builtin_lsx_vbitseli_b(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __builtin_lsx_vbitseli_b(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vbitseli_b' must be a constant integer}} + return res; +} + +v16i8 vshuf4i_b(v16i8 _1, int var) { + v16i8 res = __builtin_lsx_vshuf4i_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __builtin_lsx_vshuf4i_b(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __builtin_lsx_vshuf4i_b(_1, var); // expected-error {{argument to '__builtin_lsx_vshuf4i_b' must be a constant integer}} + return res; +} + +v8i16 vshuf4i_h(v8i16 _1, int var) { + v8i16 res = __builtin_lsx_vshuf4i_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __builtin_lsx_vshuf4i_h(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __builtin_lsx_vshuf4i_h(_1, var); // expected-error {{argument to '__builtin_lsx_vshuf4i_h' must be a constant integer}} + return res; +} + +v4i32 vshuf4i_w(v4i32 _1, int var) { + v4i32 res = __builtin_lsx_vshuf4i_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __builtin_lsx_vshuf4i_w(_1, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __builtin_lsx_vshuf4i_w(_1, var); // expected-error {{argument to '__builtin_lsx_vshuf4i_w' must be a constant integer}} + return res; +} + +int vpickve2gr_b(v16i8 _1, int var) { + int res = __builtin_lsx_vpickve2gr_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vpickve2gr_b(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vpickve2gr_b(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_b' must be a constant integer}} + return res; +} + +int vpickve2gr_h(v8i16 _1, int var) { + int res = __builtin_lsx_vpickve2gr_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vpickve2gr_h(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vpickve2gr_h(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_h' must be a constant integer}} + return res; +} + +int vpickve2gr_w(v4i32 _1, int var) { + int res = __builtin_lsx_vpickve2gr_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} + res |= __builtin_lsx_vpickve2gr_w(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + res |= __builtin_lsx_vpickve2gr_w(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_w' must be a constant integer}} + return res; +} + +long vpickve2gr_d(v2i64 _1, int var) { + long res = __builtin_lsx_vpickve2gr_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 1]}} + res |= __builtin_lsx_vpickve2gr_d(_1, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} + res |= __builtin_lsx_vpickve2gr_d(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_d' must be a constant integer}} + return res; +} + +unsigned int vpickve2gr_bu(v16i8 _1, int var) { + unsigned int res = __builtin_lsx_vpickve2gr_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vpickve2gr_bu(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vpickve2gr_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_bu' must be a constant integer}} + return res; +} + +unsigned int vpickve2gr_hu(v8i16 _1, int var) { + unsigned int res = __builtin_lsx_vpickve2gr_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vpickve2gr_hu(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vpickve2gr_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_hu' must be a constant integer}} + return res; +} + +unsigned int vpickve2gr_wu(v4i32 _1, int var) { + unsigned int res = __builtin_lsx_vpickve2gr_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} + res |= __builtin_lsx_vpickve2gr_wu(_1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + res |= __builtin_lsx_vpickve2gr_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_wu' must be a constant integer}} + return res; +} + +unsigned long int vpickve2gr_du(v2i64 _1, int var) { + unsigned long int res = __builtin_lsx_vpickve2gr_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 1]}} + res |= __builtin_lsx_vpickve2gr_du(_1, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} + res |= __builtin_lsx_vpickve2gr_du(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_du' must be a constant integer}} + return res; +} + +v16i8 vinsgr2vr_b(v16i8 _1, int var) { + v16i8 res = __builtin_lsx_vinsgr2vr_b(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vinsgr2vr_b(_1, 1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vinsgr2vr_b(_1, 1, var); // expected-error {{argument to '__builtin_lsx_vinsgr2vr_b' must be a constant integer}} + return res; +} + +v8i16 vinsgr2vr_h(v8i16 _1, int var) { + v8i16 res = __builtin_lsx_vinsgr2vr_h(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vinsgr2vr_h(_1, 1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vinsgr2vr_h(_1, 1, var); // expected-error {{argument to '__builtin_lsx_vinsgr2vr_h' must be a constant integer}} + return res; +} + +v4i32 vinsgr2vr_w(v4i32 _1, int var) { + v4i32 res = __builtin_lsx_vinsgr2vr_w(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} + res |= __builtin_lsx_vinsgr2vr_w(_1, 1, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + res |= __builtin_lsx_vinsgr2vr_w(_1, 1, var); // expected-error {{argument to '__builtin_lsx_vinsgr2vr_w' must be a constant integer}} + return res; +} + +v2i64 vinsgr2vr_d(v2i64 _1, int var) { + v2i64 res = __builtin_lsx_vinsgr2vr_d(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 1]}} + res |= __builtin_lsx_vinsgr2vr_d(_1, 1, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} + res |= __builtin_lsx_vinsgr2vr_d(_1, 1, var); // expected-error {{argument to '__builtin_lsx_vinsgr2vr_d' must be a constant integer}} + return res; +} + +v8i16 vsllwil_h_b(v16i8 _1, int var) { + v8i16 res = __builtin_lsx_vsllwil_h_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vsllwil_h_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vsllwil_h_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_h_b' must be a constant integer}} + return res; +} + +v4i32 vsllwil_w_h(v8i16 _1, int var) { + v4i32 res = __builtin_lsx_vsllwil_w_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vsllwil_w_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vsllwil_w_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_w_h' must be a constant integer}} + return res; +} + +v2i64 vsllwil_d_w(v4i32 _1, int var) { + v2i64 res = __builtin_lsx_vsllwil_d_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsllwil_d_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsllwil_d_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_d_w' must be a constant integer}} + return res; +} + +v8u16 vsllwil_hu_bu(v16u8 _1, int var) { + v8u16 res = __builtin_lsx_vsllwil_hu_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vsllwil_hu_bu(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vsllwil_hu_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_hu_bu' must be a constant integer}} + return res; +} + +v4u32 vsllwil_wu_hu(v8u16 _1, int var) { + v4u32 res = __builtin_lsx_vsllwil_wu_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vsllwil_wu_hu(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vsllwil_wu_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_wu_hu' must be a constant integer}} + return res; +} + +v2u64 vsllwil_du_wu(v4u32 _1, int var) { + v2u64 res = __builtin_lsx_vsllwil_du_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsllwil_du_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsllwil_du_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_du_wu' must be a constant integer}} + return res; +} + +v16i8 vfrstpi_b(v16i8 _1, v16i8 _2, int var) { + v16i8 res = __builtin_lsx_vfrstpi_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vfrstpi_b(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vfrstpi_b(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vfrstpi_b' must be a constant integer}} + return res; +} + +v8i16 vfrstpi_h(v8i16 _1, v8i16 _2, int var) { + v8i16 res = __builtin_lsx_vfrstpi_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vfrstpi_h(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vfrstpi_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vfrstpi_h' must be a constant integer}} + return res; +} + +v2i64 vshuf4i_d(v2i64 _1, v2i64 _2, int var) { + v2i64 res = __builtin_lsx_vshuf4i_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __builtin_lsx_vshuf4i_d(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __builtin_lsx_vshuf4i_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vshuf4i_d' must be a constant integer}} + return res; +} + +v16i8 vbsrl_v(v16i8 _1, int var) { + v16i8 res = __builtin_lsx_vbsrl_v(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vbsrl_v(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vbsrl_v(_1, var); // expected-error {{argument to '__builtin_lsx_vbsrl_v' must be a constant integer}} + return res; +} + +v16i8 vbsll_v(v16i8 _1, int var) { + v16i8 res = __builtin_lsx_vbsll_v(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vbsll_v(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vbsll_v(_1, var); // expected-error {{argument to '__builtin_lsx_vbsll_v' must be a constant integer}} + return res; +} + +v16i8 vextrins_b(v16i8 _1, v16i8 _2, int var) { + v16i8 res = __builtin_lsx_vextrins_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __builtin_lsx_vextrins_b(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __builtin_lsx_vextrins_b(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vextrins_b' must be a constant integer}} + return res; +} + +v8i16 vextrins_h(v8i16 _1, v8i16 _2, int var) { + v8i16 res = __builtin_lsx_vextrins_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __builtin_lsx_vextrins_h(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __builtin_lsx_vextrins_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vextrins_h' must be a constant integer}} + return res; +} + +v4i32 vextrins_w(v4i32 _1, v4i32 _2, int var) { + v4i32 res = __builtin_lsx_vextrins_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __builtin_lsx_vextrins_w(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __builtin_lsx_vextrins_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vextrins_w' must be a constant integer}} + return res; +} + +v2i64 vextrins_d(v2i64 _1, v2i64 _2, int var) { + v2i64 res = __builtin_lsx_vextrins_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __builtin_lsx_vextrins_d(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __builtin_lsx_vextrins_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vextrins_d' must be a constant integer}} + return res; +} + +void vstelm_b_idx(v16i8 _1, void *_2, int var) { + __builtin_lsx_vstelm_b(_1, _2, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + __builtin_lsx_vstelm_b(_1, _2, 1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + __builtin_lsx_vstelm_b(_1, _2, 1, var); // expected-error {{argument to '__builtin_lsx_vstelm_b' must be a constant integer}} +} + +void vstelm_h_idx(v8i16 _1, void *_2, int var) { + __builtin_lsx_vstelm_h(_1, _2, 2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + __builtin_lsx_vstelm_h(_1, _2, 2, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + __builtin_lsx_vstelm_h(_1, _2, 2, var); // expected-error {{argument to '__builtin_lsx_vstelm_h' must be a constant integer}} +} + +void vstelm_w_idx(v4i32 _1, void *_2, int var) { + __builtin_lsx_vstelm_w(_1, _2, 4, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 3]}} + __builtin_lsx_vstelm_w(_1, _2, 4, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}} + __builtin_lsx_vstelm_w(_1, _2, 4, var); // expected-error {{argument to '__builtin_lsx_vstelm_w' must be a constant integer}} +} + +void vstelm_d_idx(v2i64 _1, void *_2, int var) { + __builtin_lsx_vstelm_d(_1, _2, 8, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 1]}} + __builtin_lsx_vstelm_d(_1, _2, 8, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} + __builtin_lsx_vstelm_d(_1, _2, 8, var); // expected-error {{argument to '__builtin_lsx_vstelm_d' must be a constant integer}} +} + +void vstelm_b(v16i8 _1, void *_2, int var) { + __builtin_lsx_vstelm_b(_1, _2, -129, 1); // expected-error {{argument value -129 is outside the valid range [-128, 127]}} + __builtin_lsx_vstelm_b(_1, _2, 128, 1); // expected-error {{argument value 128 is outside the valid range [-128, 127]}} + __builtin_lsx_vstelm_b(_1, _2, var, 1); // expected-error {{argument to '__builtin_lsx_vstelm_b' must be a constant integer}} +} + +void vstelm_h(v8i16 _1, void *_2, int var) { + __builtin_lsx_vstelm_h(_1, _2, -258, 1); // expected-error {{argument value -258 is outside the valid range [-256, 254]}} + __builtin_lsx_vstelm_h(_1, _2, 256, 1); // expected-error {{argument value 256 is outside the valid range [-256, 254]}} + __builtin_lsx_vstelm_h(_1, _2, var, 1); // expected-error {{argument to '__builtin_lsx_vstelm_h' must be a constant integer}} +} + +void vstelm_w(v4i32 _1, void *_2, int var) { + __builtin_lsx_vstelm_w(_1, _2, -516, 1); // expected-error {{argument value -516 is outside the valid range [-512, 508]}} + __builtin_lsx_vstelm_w(_1, _2, 512, 1); // expected-error {{argument value 512 is outside the valid range [-512, 508]}} + __builtin_lsx_vstelm_w(_1, _2, var, 1); // expected-error {{argument to '__builtin_lsx_vstelm_w' must be a constant integer}} +} + +void vstelm_d(v2i64 _1, void *_2, int var) { + __builtin_lsx_vstelm_d(_1, _2, -1032, 1); // expected-error {{argument value -1032 is outside the valid range [-1024, 1016]}} + __builtin_lsx_vstelm_d(_1, _2, 1024, 1); // expected-error {{argument value 1024 is outside the valid range [-1024, 1016]}} + __builtin_lsx_vstelm_d(_1, _2, var, 1); // expected-error {{argument to '__builtin_lsx_vstelm_d' must be a constant integer}} +} + +v16i8 vldrepl_b(void *_1, int var) { + v16i8 res = __builtin_lsx_vldrepl_b(_1, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} + res |= __builtin_lsx_vldrepl_b(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2047]}} + res |= __builtin_lsx_vldrepl_b(_1, var); // expected-error {{argument to '__builtin_lsx_vldrepl_b' must be a constant integer}} + return res; +} + +v8i16 vldrepl_h(void *_1, int var) { + v8i16 res = __builtin_lsx_vldrepl_h(_1, -2050); // expected-error {{argument value -2050 is outside the valid range [-2048, 2046]}} + res |= __builtin_lsx_vldrepl_h(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2046]}} + res |= __builtin_lsx_vldrepl_h(_1, var); // expected-error {{argument to '__builtin_lsx_vldrepl_h' must be a constant integer}} + return res; +} + +v4i32 vldrepl_w(void *_1, int var) { + v4i32 res = __builtin_lsx_vldrepl_w(_1, -2052); // expected-error {{argument value -2052 is outside the valid range [-2048, 2044]}} + res |= __builtin_lsx_vldrepl_w(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2044]}} + res |= __builtin_lsx_vldrepl_w(_1, var); // expected-error {{argument to '__builtin_lsx_vldrepl_w' must be a constant integer}} + return res; +} + +v2i64 vldrepl_d(void *_1, int var) { + v2i64 res = __builtin_lsx_vldrepl_d(_1, -2056); // expected-error {{argument value -2056 is outside the valid range [-2048, 2040]}} + res |= __builtin_lsx_vldrepl_d(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2040]}} + res |= __builtin_lsx_vldrepl_d(_1, var); // expected-error {{argument to '__builtin_lsx_vldrepl_d' must be a constant integer}} + return res; +} + +v16i8 vrotri_b(v16i8 _1, int var) { + v16i8 res = __builtin_lsx_vrotri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vrotri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + res |= __builtin_lsx_vrotri_b(_1, var); // expected-error {{argument to '__builtin_lsx_vrotri_b' must be a constant integer}} + return res; +} + +v8i16 vrotri_h(v8i16 _1, int var) { + v8i16 res = __builtin_lsx_vrotri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vrotri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vrotri_h(_1, var); // expected-error {{argument to '__builtin_lsx_vrotri_h' must be a constant integer}} + return res; +} + +v4i32 vrotri_w(v4i32 _1, int var) { + v4i32 res = __builtin_lsx_vrotri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vrotri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vrotri_w(_1, var); // expected-error {{argument to '__builtin_lsx_vrotri_w' must be a constant integer}} + return res; +} + +v2i64 vrotri_d(v2i64 _1, int var) { + v2i64 res = __builtin_lsx_vrotri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vrotri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vrotri_d(_1, var); // expected-error {{argument to '__builtin_lsx_vrotri_d' must be a constant integer}} + return res; +} + +v16i8 vsrlni_b_h(v16i8 _1, v16i8 _2, int var) { + v16i8 res = __builtin_lsx_vsrlni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vsrlni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vsrlni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlni_b_h' must be a constant integer}} + return res; +} + +v8i16 vsrlni_h_w(v8i16 _1, v8i16 _2, int var) { + v8i16 res = __builtin_lsx_vsrlni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsrlni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsrlni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlni_h_w' must be a constant integer}} + return res; +} + +v4i32 vsrlni_w_d(v4i32 _1, v4i32 _2, int var) { + v4i32 res = __builtin_lsx_vsrlni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vsrlni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vsrlni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlni_w_d' must be a constant integer}} + return res; +} + +v2i64 vsrlni_d_q(v2i64 _1, v2i64 _2, int var) { + v2i64 res = __builtin_lsx_vsrlni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __builtin_lsx_vsrlni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __builtin_lsx_vsrlni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlni_d_q' must be a constant integer}} + return res; +} + +v16i8 vsrlrni_b_h(v16i8 _1, v16i8 _2, int var) { + v16i8 res = __builtin_lsx_vsrlrni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vsrlrni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vsrlrni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlrni_b_h' must be a constant integer}} + return res; +} + +v8i16 vsrlrni_h_w(v8i16 _1, v8i16 _2, int var) { + v8i16 res = __builtin_lsx_vsrlrni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsrlrni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsrlrni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlrni_h_w' must be a constant integer}} + return res; +} + +v4i32 vsrlrni_w_d(v4i32 _1, v4i32 _2, int var) { + v4i32 res = __builtin_lsx_vsrlrni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vsrlrni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vsrlrni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlrni_w_d' must be a constant integer}} + return res; +} + +v2i64 vsrlrni_d_q(v2i64 _1, v2i64 _2, int var) { + v2i64 res = __builtin_lsx_vsrlrni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __builtin_lsx_vsrlrni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __builtin_lsx_vsrlrni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlrni_d_q' must be a constant integer}} + return res; +} + +v16i8 vssrlni_b_h(v16i8 _1, v16i8 _2, int var) { + v16i8 res = __builtin_lsx_vssrlni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vssrlni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vssrlni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_b_h' must be a constant integer}} + return res; +} + +v8i16 vssrlni_h_w(v8i16 _1, v8i16 _2, int var) { + v8i16 res = __builtin_lsx_vssrlni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vssrlni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vssrlni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_h_w' must be a constant integer}} + return res; +} + +v4i32 vssrlni_w_d(v4i32 _1, v4i32 _2, int var) { + v4i32 res = __builtin_lsx_vssrlni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vssrlni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vssrlni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_w_d' must be a constant integer}} + return res; +} + +v2i64 vssrlni_d_q(v2i64 _1, v2i64 _2, int var) { + v2i64 res = __builtin_lsx_vssrlni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __builtin_lsx_vssrlni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __builtin_lsx_vssrlni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_d_q' must be a constant integer}} + return res; +} + +v16u8 vssrlni_bu_h(v16u8 _1, v16i8 _2, int var) { + v16u8 res = __builtin_lsx_vssrlni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vssrlni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vssrlni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_bu_h' must be a constant integer}} + return res; +} + +v8u16 vssrlni_hu_w(v8u16 _1, v8i16 _2, int var) { + v8u16 res = __builtin_lsx_vssrlni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vssrlni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vssrlni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_hu_w' must be a constant integer}} + return res; +} + +v4u32 vssrlni_wu_d(v4u32 _1, v4i32 _2, int var) { + v4u32 res = __builtin_lsx_vssrlni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vssrlni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vssrlni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_wu_d' must be a constant integer}} + return res; +} + +v2u64 vssrlni_du_q(v2u64 _1, v2i64 _2, int var) { + v2u64 res = __builtin_lsx_vssrlni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __builtin_lsx_vssrlni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __builtin_lsx_vssrlni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_du_q' must be a constant integer}} + return res; +} + +v16i8 vssrlrni_b_h(v16i8 _1, v16i8 _2, int var) { + v16i8 res = __builtin_lsx_vssrlrni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vssrlrni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vssrlrni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_b_h' must be a constant integer}} + return res; +} + +v8i16 vssrlrni_h_w(v8i16 _1, v8i16 _2, int var) { + v8i16 res = __builtin_lsx_vssrlrni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vssrlrni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vssrlrni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_h_w' must be a constant integer}} + return res; +} + +v4i32 vssrlrni_w_d(v4i32 _1, v4i32 _2, int var) { + v4i32 res = __builtin_lsx_vssrlrni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vssrlrni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vssrlrni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_w_d' must be a constant integer}} + return res; +} + +v2i64 vssrlrni_d_q(v2i64 _1, v2i64 _2, int var) { + v2i64 res = __builtin_lsx_vssrlrni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __builtin_lsx_vssrlrni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __builtin_lsx_vssrlrni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_d_q' must be a constant integer}} + return res; +} + +v16u8 vssrlrni_bu_h(v16u8 _1, v16i8 _2, int var) { + v16u8 res = __builtin_lsx_vssrlrni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vssrlrni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vssrlrni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_bu_h' must be a constant integer}} + return res; +} + +v8u16 vssrlrni_hu_w(v8u16 _1, v8i16 _2, int var) { + v8u16 res = __builtin_lsx_vssrlrni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vssrlrni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vssrlrni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_hu_w' must be a constant integer}} + return res; +} + +v4u32 vssrlrni_wu_d(v4u32 _1, v4i32 _2, int var) { + v4u32 res = __builtin_lsx_vssrlrni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vssrlrni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vssrlrni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_wu_d' must be a constant integer}} + return res; +} + +v2u64 vssrlrni_du_q(v2u64 _1, v2i64 _2, int var) { + v2u64 res = __builtin_lsx_vssrlrni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __builtin_lsx_vssrlrni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __builtin_lsx_vssrlrni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_du_q' must be a constant integer}} + return res; +} + +v16i8 vsrani_b_h(v16i8 _1, v16i8 _2, int var) { + v16i8 res = __builtin_lsx_vsrani_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vsrani_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vsrani_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrani_b_h' must be a constant integer}} + return res; +} + +v8i16 vsrani_h_w(v8i16 _1, v8i16 _2, int var) { + v8i16 res = __builtin_lsx_vsrani_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsrani_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsrani_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrani_h_w' must be a constant integer}} + return res; +} + +v4i32 vsrani_w_d(v4i32 _1, v4i32 _2, int var) { + v4i32 res = __builtin_lsx_vsrani_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vsrani_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vsrani_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrani_w_d' must be a constant integer}} + return res; +} + +v2i64 vsrani_d_q(v2i64 _1, v2i64 _2, int var) { + v2i64 res = __builtin_lsx_vsrani_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __builtin_lsx_vsrani_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __builtin_lsx_vsrani_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrani_d_q' must be a constant integer}} + return res; +} + +v16i8 vsrarni_b_h(v16i8 _1, v16i8 _2, int var) { + v16i8 res = __builtin_lsx_vsrarni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vsrarni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vsrarni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrarni_b_h' must be a constant integer}} + return res; +} + +v8i16 vsrarni_h_w(v8i16 _1, v8i16 _2, int var) { + v8i16 res = __builtin_lsx_vsrarni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsrarni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vsrarni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrarni_h_w' must be a constant integer}} + return res; +} + +v4i32 vsrarni_w_d(v4i32 _1, v4i32 _2, int var) { + v4i32 res = __builtin_lsx_vsrarni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vsrarni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vsrarni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrarni_w_d' must be a constant integer}} + return res; +} + +v2i64 vsrarni_d_q(v2i64 _1, v2i64 _2, int var) { + v2i64 res = __builtin_lsx_vsrarni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __builtin_lsx_vsrarni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __builtin_lsx_vsrarni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrarni_d_q' must be a constant integer}} + return res; +} + +v16i8 vssrani_b_h(v16i8 _1, v16i8 _2, int var) { + v16i8 res = __builtin_lsx_vssrani_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vssrani_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vssrani_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_b_h' must be a constant integer}} + return res; +} + +v8i16 vssrani_h_w(v8i16 _1, v8i16 _2, int var) { + v8i16 res = __builtin_lsx_vssrani_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vssrani_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vssrani_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_h_w' must be a constant integer}} + return res; +} + +v4i32 vssrani_w_d(v4i32 _1, v4i32 _2, int var) { + v4i32 res = __builtin_lsx_vssrani_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vssrani_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vssrani_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_w_d' must be a constant integer}} + return res; +} + +v2i64 vssrani_d_q(v2i64 _1, v2i64 _2, int var) { + v2i64 res = __builtin_lsx_vssrani_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __builtin_lsx_vssrani_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __builtin_lsx_vssrani_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_d_q' must be a constant integer}} + return res; +} + +v16u8 vssrani_bu_h(v16u8 _1, v16i8 _2, int var) { + v16u8 res = __builtin_lsx_vssrani_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vssrani_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vssrani_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_bu_h' must be a constant integer}} + return res; +} + +v8u16 vssrani_hu_w(v8u16 _1, v8i16 _2, int var) { + v8u16 res = __builtin_lsx_vssrani_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vssrani_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vssrani_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_hu_w' must be a constant integer}} + return res; +} + +v4u32 vssrani_wu_d(v4u32 _1, v4i32 _2, int var) { + v4u32 res = __builtin_lsx_vssrani_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vssrani_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vssrani_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_wu_d' must be a constant integer}} + return res; +} + +v2u64 vssrani_du_q(v2u64 _1, v2i64 _2, int var) { + v2u64 res = __builtin_lsx_vssrani_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __builtin_lsx_vssrani_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __builtin_lsx_vssrani_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_du_q' must be a constant integer}} + return res; +} + +v16i8 vssrarni_b_h(v16i8 _1, v16i8 _2, int var) { + v16i8 res = __builtin_lsx_vssrarni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vssrarni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vssrarni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_b_h' must be a constant integer}} + return res; +} + +v8i16 vssrarni_h_w(v8i16 _1, v8i16 _2, int var) { + v8i16 res = __builtin_lsx_vssrarni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vssrarni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vssrarni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_h_w' must be a constant integer}} + return res; +} + +v4i32 vssrarni_w_d(v4i32 _1, v4i32 _2, int var) { + v4i32 res = __builtin_lsx_vssrarni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vssrarni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vssrarni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_w_d' must be a constant integer}} + return res; +} + +v2i64 vssrarni_d_q(v2i64 _1, v2i64 _2, int var) { + v2i64 res = __builtin_lsx_vssrarni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __builtin_lsx_vssrarni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __builtin_lsx_vssrarni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_d_q' must be a constant integer}} + return res; +} + +v16u8 vssrarni_bu_h(v16u8 _1, v16i8 _2, int var) { + v16u8 res = __builtin_lsx_vssrarni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vssrarni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + res |= __builtin_lsx_vssrarni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_bu_h' must be a constant integer}} + return res; +} + +v8u16 vssrarni_hu_w(v8u16 _1, v8i16 _2, int var) { + v8u16 res = __builtin_lsx_vssrarni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vssrarni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + res |= __builtin_lsx_vssrarni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_hu_w' must be a constant integer}} + return res; +} + +v4u32 vssrarni_wu_d(v4u32 _1, v4i32 _2, int var) { + v4u32 res = __builtin_lsx_vssrarni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vssrarni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + res |= __builtin_lsx_vssrarni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_wu_d' must be a constant integer}} + return res; +} + +v2u64 vssrarni_du_q(v2u64 _1, v2i64 _2, int var) { + v2u64 res = __builtin_lsx_vssrarni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 127]}} + res |= __builtin_lsx_vssrarni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range [0, 127]}} + res |= __builtin_lsx_vssrarni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_du_q' must be a constant integer}} + return res; +} + +v4i32 vpermi_w(v4i32 _1, v4i32 _2, int var) { + v4i32 res = __builtin_lsx_vpermi_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range [0, 255]}} + res |= __builtin_lsx_vpermi_w(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range [0, 255]}} + res |= __builtin_lsx_vpermi_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vpermi_w' must be a constant integer}} + return res; +} + +v16i8 vld(void *_1, int var) { + v16i8 res = __builtin_lsx_vld(_1, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} + res |= __builtin_lsx_vld(_1, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2047]}} + res |= __builtin_lsx_vld(_1, var); // expected-error {{argument to '__builtin_lsx_vld' must be a constant integer}} + return res; +} + +void vst(v16i8 _1, void *_2, int var) { + __builtin_lsx_vst(_1, _2, -2049); // expected-error {{argument value -2049 is outside the valid range [-2048, 2047]}} + __builtin_lsx_vst(_1, _2, 2048); // expected-error {{argument value 2048 is outside the valid range [-2048, 2047]}} + __builtin_lsx_vst(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vst' must be a constant integer}} +} + +v2i64 vldi(int var) { + v2i64 res = __builtin_lsx_vldi(-4097); // expected-error {{argument value -4097 is outside the valid range [-4096, 4095]}} + res |= __builtin_lsx_vldi(4096); // expected-error {{argument value 4096 is outside the valid range [-4096, 4095]}} + res |= __builtin_lsx_vldi(var); // expected-error {{argument to '__builtin_lsx_vldi' must be a constant integer}} + return res; +} + +v16i8 vrepli_b(int var) { + v16i8 res = __builtin_lsx_vrepli_b(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} + res |= __builtin_lsx_vrepli_b(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} + res |= __builtin_lsx_vrepli_b(var); // expected-error {{argument to '__builtin_lsx_vrepli_b' must be a constant integer}} + return res; +} + +v2i64 vrepli_d(int var) { + v2i64 res = __builtin_lsx_vrepli_d(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} + res |= __builtin_lsx_vrepli_d(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} + res |= __builtin_lsx_vrepli_d(var); // expected-error {{argument to '__builtin_lsx_vrepli_d' must be a constant integer}} + return res; +} + +v8i16 vrepli_h(int var) { + v8i16 res = __builtin_lsx_vrepli_h(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} + res |= __builtin_lsx_vrepli_h(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} + res |= __builtin_lsx_vrepli_h(var); // expected-error {{argument to '__builtin_lsx_vrepli_h' must be a constant integer}} + return res; +} + +v4i32 vrepli_w(int var) { + v4i32 res = __builtin_lsx_vrepli_w(-513); // expected-error {{argument value -513 is outside the valid range [-512, 511]}} + res |= __builtin_lsx_vrepli_w(512); // expected-error {{argument value 512 is outside the valid range [-512, 511]}} + res |= __builtin_lsx_vrepli_w(var); // expected-error {{argument to '__builtin_lsx_vrepli_w' must be a constant integer}} + return res; +} diff --git a/clang/test/CodeGen/LoongArch/lsx/builtin.c b/clang/test/CodeGen/LoongArch/lsx/builtin.c new file mode 100644 index 0000000000000000000000000000000000000000..05a3d13a7fb9aeb9eb089f9ad49167a31e633754 --- /dev/null +++ b/clang/test/CodeGen/LoongArch/lsx/builtin.c @@ -0,0 +1,7101 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple loongarch64 -target-feature +lsx -O2 -emit-llvm %s -o - | FileCheck %s + +typedef signed char v16i8 __attribute__ ((vector_size(16), aligned(16))); +typedef signed char v16i8_b __attribute__ ((vector_size(16), aligned(1))); +typedef unsigned char v16u8 __attribute__ ((vector_size(16), aligned(16))); +typedef unsigned char v16u8_b __attribute__ ((vector_size(16), aligned(1))); +typedef short v8i16 __attribute__ ((vector_size(16), aligned(16))); +typedef short v8i16_h __attribute__ ((vector_size(16), aligned(2))); +typedef unsigned short v8u16 __attribute__ ((vector_size(16), aligned(16))); +typedef unsigned short v8u16_h __attribute__ ((vector_size(16), aligned(2))); +typedef int v4i32 __attribute__ ((vector_size(16), aligned(16))); +typedef int v4i32_w __attribute__ ((vector_size(16), aligned(4))); +typedef unsigned int v4u32 __attribute__ ((vector_size(16), aligned(16))); +typedef unsigned int v4u32_w __attribute__ ((vector_size(16), aligned(4))); +typedef long long v2i64 __attribute__ ((vector_size(16), aligned(16))); +typedef long long v2i64_d __attribute__ ((vector_size(16), aligned(8))); +typedef unsigned long long v2u64 __attribute__ ((vector_size(16), aligned(16))); +typedef unsigned long long v2u64_d __attribute__ ((vector_size(16), aligned(8))); +typedef float v4f32 __attribute__ ((vector_size(16), aligned(16))); +typedef float v4f32_w __attribute__ ((vector_size(16), aligned(4))); +typedef double v2f64 __attribute__ ((vector_size(16), aligned(16))); +typedef double v2f64_d __attribute__ ((vector_size(16), aligned(8))); + +typedef long long __m128i __attribute__ ((__vector_size__ (16), __may_alias__)); +typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__)); +typedef double __m128d __attribute__ ((__vector_size__ (16), __may_alias__)); + + +// CHECK-LABEL: @vsll_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsll.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vsll_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vsll_b(_1, _2); } +// CHECK-LABEL: @vsll_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsll.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vsll_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vsll_h(_1, _2); } +// CHECK-LABEL: @vsll_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsll.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vsll_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vsll_w(_1, _2); } +// CHECK-LABEL: @vsll_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsll.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vsll_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vsll_d(_1, _2); } +// CHECK-LABEL: @vslli_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslli.b(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vslli_b(v16i8 _1) { return __builtin_lsx_vslli_b(_1, 1); } +// CHECK-LABEL: @vslli_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslli.h(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vslli_h(v8i16 _1) { return __builtin_lsx_vslli_h(_1, 1); } +// CHECK-LABEL: @vslli_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslli.w(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vslli_w(v4i32 _1) { return __builtin_lsx_vslli_w(_1, 1); } +// CHECK-LABEL: @vslli_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslli.d(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vslli_d(v2i64 _1) { return __builtin_lsx_vslli_d(_1, 1); } +// CHECK-LABEL: @vsra_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsra.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vsra_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vsra_b(_1, _2); } +// CHECK-LABEL: @vsra_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsra.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vsra_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vsra_h(_1, _2); } +// CHECK-LABEL: @vsra_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsra.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vsra_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vsra_w(_1, _2); } +// CHECK-LABEL: @vsra_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsra.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vsra_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vsra_d(_1, _2); } +// CHECK-LABEL: @vsrai_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrai.b(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vsrai_b(v16i8 _1) { return __builtin_lsx_vsrai_b(_1, 1); } +// CHECK-LABEL: @vsrai_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrai.h(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vsrai_h(v8i16 _1) { return __builtin_lsx_vsrai_h(_1, 1); } +// CHECK-LABEL: @vsrai_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrai.w(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vsrai_w(v4i32 _1) { return __builtin_lsx_vsrai_w(_1, 1); } +// CHECK-LABEL: @vsrai_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrai.d(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vsrai_d(v2i64 _1) { return __builtin_lsx_vsrai_d(_1, 1); } +// CHECK-LABEL: @vsrar_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrar.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vsrar_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vsrar_b(_1, _2); +} +// CHECK-LABEL: @vsrar_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrar.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vsrar_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vsrar_h(_1, _2); +} +// CHECK-LABEL: @vsrar_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrar.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vsrar_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vsrar_w(_1, _2); +} +// CHECK-LABEL: @vsrar_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrar.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vsrar_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vsrar_d(_1, _2); +} +// CHECK-LABEL: @vsrari_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrari.b(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vsrari_b(v16i8 _1) { return __builtin_lsx_vsrari_b(_1, 1); } +// CHECK-LABEL: @vsrari_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrari.h(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vsrari_h(v8i16 _1) { return __builtin_lsx_vsrari_h(_1, 1); } +// CHECK-LABEL: @vsrari_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrari.w(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vsrari_w(v4i32 _1) { return __builtin_lsx_vsrari_w(_1, 1); } +// CHECK-LABEL: @vsrari_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrari.d(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vsrari_d(v2i64 _1) { return __builtin_lsx_vsrari_d(_1, 1); } +// CHECK-LABEL: @vsrl_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrl.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vsrl_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vsrl_b(_1, _2); } +// CHECK-LABEL: @vsrl_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrl.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vsrl_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vsrl_h(_1, _2); } +// CHECK-LABEL: @vsrl_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrl.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vsrl_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vsrl_w(_1, _2); } +// CHECK-LABEL: @vsrl_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrl.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vsrl_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vsrl_d(_1, _2); } +// CHECK-LABEL: @vsrli_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrli.b(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vsrli_b(v16i8 _1) { return __builtin_lsx_vsrli_b(_1, 1); } +// CHECK-LABEL: @vsrli_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrli.h(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vsrli_h(v8i16 _1) { return __builtin_lsx_vsrli_h(_1, 1); } +// CHECK-LABEL: @vsrli_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrli.w(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vsrli_w(v4i32 _1) { return __builtin_lsx_vsrli_w(_1, 1); } +// CHECK-LABEL: @vsrli_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrli.d(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vsrli_d(v2i64 _1) { return __builtin_lsx_vsrli_d(_1, 1); } +// CHECK-LABEL: @vsrlr_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlr.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vsrlr_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vsrlr_b(_1, _2); +} +// CHECK-LABEL: @vsrlr_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlr.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vsrlr_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vsrlr_h(_1, _2); +} +// CHECK-LABEL: @vsrlr_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlr.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vsrlr_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vsrlr_w(_1, _2); +} +// CHECK-LABEL: @vsrlr_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlr.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vsrlr_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vsrlr_d(_1, _2); +} +// CHECK-LABEL: @vsrlri_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlri.b(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vsrlri_b(v16i8 _1) { return __builtin_lsx_vsrlri_b(_1, 1); } +// CHECK-LABEL: @vsrlri_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlri.h(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vsrlri_h(v8i16 _1) { return __builtin_lsx_vsrlri_h(_1, 1); } +// CHECK-LABEL: @vsrlri_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlri.w(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vsrlri_w(v4i32 _1) { return __builtin_lsx_vsrlri_w(_1, 1); } +// CHECK-LABEL: @vsrlri_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlri.d(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vsrlri_d(v2i64 _1) { return __builtin_lsx_vsrlri_d(_1, 1); } +// CHECK-LABEL: @vbitclr_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitclr.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vbitclr_b(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vbitclr_b(_1, _2); +} +// CHECK-LABEL: @vbitclr_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitclr.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8u16 vbitclr_h(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vbitclr_h(_1, _2); +} +// CHECK-LABEL: @vbitclr_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitclr.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4u32 vbitclr_w(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vbitclr_w(_1, _2); +} +// CHECK-LABEL: @vbitclr_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitclr.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2u64 vbitclr_d(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vbitclr_d(_1, _2); +} +// CHECK-LABEL: @vbitclri_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitclri.b(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16u8 vbitclri_b(v16u8 _1) { return __builtin_lsx_vbitclri_b(_1, 1); } +// CHECK-LABEL: @vbitclri_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitclri.h(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8u16 vbitclri_h(v8u16 _1) { return __builtin_lsx_vbitclri_h(_1, 1); } +// CHECK-LABEL: @vbitclri_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitclri.w(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4u32 vbitclri_w(v4u32 _1) { return __builtin_lsx_vbitclri_w(_1, 1); } +// CHECK-LABEL: @vbitclri_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitclri.d(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2u64 vbitclri_d(v2u64 _1) { return __builtin_lsx_vbitclri_d(_1, 1); } +// CHECK-LABEL: @vbitset_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitset.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vbitset_b(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vbitset_b(_1, _2); +} +// CHECK-LABEL: @vbitset_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitset.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8u16 vbitset_h(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vbitset_h(_1, _2); +} +// CHECK-LABEL: @vbitset_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitset.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4u32 vbitset_w(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vbitset_w(_1, _2); +} +// CHECK-LABEL: @vbitset_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitset.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2u64 vbitset_d(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vbitset_d(_1, _2); +} +// CHECK-LABEL: @vbitseti_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitseti.b(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16u8 vbitseti_b(v16u8 _1) { return __builtin_lsx_vbitseti_b(_1, 1); } +// CHECK-LABEL: @vbitseti_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitseti.h(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8u16 vbitseti_h(v8u16 _1) { return __builtin_lsx_vbitseti_h(_1, 1); } +// CHECK-LABEL: @vbitseti_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitseti.w(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4u32 vbitseti_w(v4u32 _1) { return __builtin_lsx_vbitseti_w(_1, 1); } +// CHECK-LABEL: @vbitseti_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitseti.d(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2u64 vbitseti_d(v2u64 _1) { return __builtin_lsx_vbitseti_d(_1, 1); } +// CHECK-LABEL: @vbitrev_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitrev.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vbitrev_b(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vbitrev_b(_1, _2); +} +// CHECK-LABEL: @vbitrev_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitrev.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8u16 vbitrev_h(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vbitrev_h(_1, _2); +} +// CHECK-LABEL: @vbitrev_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitrev.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4u32 vbitrev_w(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vbitrev_w(_1, _2); +} +// CHECK-LABEL: @vbitrev_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitrev.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2u64 vbitrev_d(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vbitrev_d(_1, _2); +} +// CHECK-LABEL: @vbitrevi_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitrevi.b(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16u8 vbitrevi_b(v16u8 _1) { return __builtin_lsx_vbitrevi_b(_1, 1); } +// CHECK-LABEL: @vbitrevi_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vbitrevi.h(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8u16 vbitrevi_h(v8u16 _1) { return __builtin_lsx_vbitrevi_h(_1, 1); } +// CHECK-LABEL: @vbitrevi_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vbitrevi.w(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4u32 vbitrevi_w(v4u32 _1) { return __builtin_lsx_vbitrevi_w(_1, 1); } +// CHECK-LABEL: @vbitrevi_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vbitrevi.d(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2u64 vbitrevi_d(v2u64 _1) { return __builtin_lsx_vbitrevi_d(_1, 1); } +// CHECK-LABEL: @vadd_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vadd.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vadd_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vadd_b(_1, _2); } +// CHECK-LABEL: @vadd_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vadd.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vadd_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vadd_h(_1, _2); } +// CHECK-LABEL: @vadd_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vadd.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vadd_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vadd_w(_1, _2); } +// CHECK-LABEL: @vadd_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vadd.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vadd_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vadd_d(_1, _2); } +// CHECK-LABEL: @vaddi_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vaddi.bu(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vaddi_bu(v16i8 _1) { return __builtin_lsx_vaddi_bu(_1, 1); } +// CHECK-LABEL: @vaddi_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddi.hu(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vaddi_hu(v8i16 _1) { return __builtin_lsx_vaddi_hu(_1, 1); } +// CHECK-LABEL: @vaddi_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddi.wu(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vaddi_wu(v4i32 _1) { return __builtin_lsx_vaddi_wu(_1, 1); } +// CHECK-LABEL: @vaddi_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddi.du(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vaddi_du(v2i64 _1) { return __builtin_lsx_vaddi_du(_1, 1); } +// CHECK-LABEL: @vsub_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsub.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vsub_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vsub_b(_1, _2); } +// CHECK-LABEL: @vsub_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsub.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vsub_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vsub_h(_1, _2); } +// CHECK-LABEL: @vsub_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsub.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vsub_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vsub_w(_1, _2); } +// CHECK-LABEL: @vsub_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsub.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vsub_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vsub_d(_1, _2); } +// CHECK-LABEL: @vsubi_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsubi.bu(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vsubi_bu(v16i8 _1) { return __builtin_lsx_vsubi_bu(_1, 1); } +// CHECK-LABEL: @vsubi_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubi.hu(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vsubi_hu(v8i16 _1) { return __builtin_lsx_vsubi_hu(_1, 1); } +// CHECK-LABEL: @vsubi_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubi.wu(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vsubi_wu(v4i32 _1) { return __builtin_lsx_vsubi_wu(_1, 1); } +// CHECK-LABEL: @vsubi_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubi.du(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vsubi_du(v2i64 _1) { return __builtin_lsx_vsubi_du(_1, 1); } +// CHECK-LABEL: @vmax_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmax.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vmax_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vmax_b(_1, _2); } +// CHECK-LABEL: @vmax_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmax.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vmax_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vmax_h(_1, _2); } +// CHECK-LABEL: @vmax_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmax.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vmax_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vmax_w(_1, _2); } +// CHECK-LABEL: @vmax_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmax.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vmax_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vmax_d(_1, _2); } +// CHECK-LABEL: @vmaxi_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmaxi.b(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vmaxi_b(v16i8 _1) { return __builtin_lsx_vmaxi_b(_1, 1); } +// CHECK-LABEL: @vmaxi_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaxi.h(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vmaxi_h(v8i16 _1) { return __builtin_lsx_vmaxi_h(_1, 1); } +// CHECK-LABEL: @vmaxi_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaxi.w(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vmaxi_w(v4i32 _1) { return __builtin_lsx_vmaxi_w(_1, 1); } +// CHECK-LABEL: @vmaxi_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaxi.d(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vmaxi_d(v2i64 _1) { return __builtin_lsx_vmaxi_d(_1, 1); } +// CHECK-LABEL: @vmax_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmax.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vmax_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vmax_bu(_1, _2); +} +// CHECK-LABEL: @vmax_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmax.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8u16 vmax_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vmax_hu(_1, _2); +} +// CHECK-LABEL: @vmax_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmax.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4u32 vmax_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vmax_wu(_1, _2); +} +// CHECK-LABEL: @vmax_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmax.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2u64 vmax_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vmax_du(_1, _2); +} +// CHECK-LABEL: @vmaxi_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmaxi.bu(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16u8 vmaxi_bu(v16u8 _1) { return __builtin_lsx_vmaxi_bu(_1, 1); } +// CHECK-LABEL: @vmaxi_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaxi.hu(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8u16 vmaxi_hu(v8u16 _1) { return __builtin_lsx_vmaxi_hu(_1, 1); } +// CHECK-LABEL: @vmaxi_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaxi.wu(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4u32 vmaxi_wu(v4u32 _1) { return __builtin_lsx_vmaxi_wu(_1, 1); } +// CHECK-LABEL: @vmaxi_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaxi.du(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2u64 vmaxi_du(v2u64 _1) { return __builtin_lsx_vmaxi_du(_1, 1); } +// CHECK-LABEL: @vmin_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmin.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vmin_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vmin_b(_1, _2); } +// CHECK-LABEL: @vmin_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmin.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vmin_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vmin_h(_1, _2); } +// CHECK-LABEL: @vmin_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmin.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vmin_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vmin_w(_1, _2); } +// CHECK-LABEL: @vmin_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmin.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vmin_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vmin_d(_1, _2); } +// CHECK-LABEL: @vmini_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmini.b(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vmini_b(v16i8 _1) { return __builtin_lsx_vmini_b(_1, 1); } +// CHECK-LABEL: @vmini_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmini.h(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vmini_h(v8i16 _1) { return __builtin_lsx_vmini_h(_1, 1); } +// CHECK-LABEL: @vmini_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmini.w(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vmini_w(v4i32 _1) { return __builtin_lsx_vmini_w(_1, 1); } +// CHECK-LABEL: @vmini_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmini.d(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vmini_d(v2i64 _1) { return __builtin_lsx_vmini_d(_1, 1); } +// CHECK-LABEL: @vmin_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmin.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vmin_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vmin_bu(_1, _2); +} +// CHECK-LABEL: @vmin_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmin.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8u16 vmin_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vmin_hu(_1, _2); +} +// CHECK-LABEL: @vmin_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmin.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4u32 vmin_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vmin_wu(_1, _2); +} +// CHECK-LABEL: @vmin_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmin.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2u64 vmin_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vmin_du(_1, _2); +} +// CHECK-LABEL: @vmini_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmini.bu(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16u8 vmini_bu(v16u8 _1) { return __builtin_lsx_vmini_bu(_1, 1); } +// CHECK-LABEL: @vmini_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmini.hu(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8u16 vmini_hu(v8u16 _1) { return __builtin_lsx_vmini_hu(_1, 1); } +// CHECK-LABEL: @vmini_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmini.wu(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4u32 vmini_wu(v4u32 _1) { return __builtin_lsx_vmini_wu(_1, 1); } +// CHECK-LABEL: @vmini_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmini.du(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2u64 vmini_du(v2u64 _1) { return __builtin_lsx_vmini_du(_1, 1); } +// CHECK-LABEL: @vseq_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vseq.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vseq_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vseq_b(_1, _2); } +// CHECK-LABEL: @vseq_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vseq.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vseq_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vseq_h(_1, _2); } +// CHECK-LABEL: @vseq_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vseq.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vseq_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vseq_w(_1, _2); } +// CHECK-LABEL: @vseq_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vseq.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vseq_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vseq_d(_1, _2); } +// CHECK-LABEL: @vseqi_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vseqi.b(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vseqi_b(v16i8 _1) { return __builtin_lsx_vseqi_b(_1, 1); } +// CHECK-LABEL: @vseqi_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vseqi.h(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vseqi_h(v8i16 _1) { return __builtin_lsx_vseqi_h(_1, 1); } +// CHECK-LABEL: @vseqi_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vseqi.w(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vseqi_w(v4i32 _1) { return __builtin_lsx_vseqi_w(_1, 1); } +// CHECK-LABEL: @vseqi_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vseqi.d(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vseqi_d(v2i64 _1) { return __builtin_lsx_vseqi_d(_1, 1); } +// CHECK-LABEL: @vslti_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslti.b(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vslti_b(v16i8 _1) { return __builtin_lsx_vslti_b(_1, 1); } +// CHECK-LABEL: @vslt_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslt.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vslt_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vslt_b(_1, _2); } +// CHECK-LABEL: @vslt_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslt.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vslt_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vslt_h(_1, _2); } +// CHECK-LABEL: @vslt_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslt.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vslt_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vslt_w(_1, _2); } +// CHECK-LABEL: @vslt_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslt.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vslt_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vslt_d(_1, _2); } +// CHECK-LABEL: @vslti_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslti.h(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vslti_h(v8i16 _1) { return __builtin_lsx_vslti_h(_1, 1); } +// CHECK-LABEL: @vslti_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslti.w(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vslti_w(v4i32 _1) { return __builtin_lsx_vslti_w(_1, 1); } +// CHECK-LABEL: @vslti_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslti.d(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vslti_d(v2i64 _1) { return __builtin_lsx_vslti_d(_1, 1); } +// CHECK-LABEL: @vslt_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslt.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vslt_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vslt_bu(_1, _2); +} +// CHECK-LABEL: @vslt_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslt.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vslt_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vslt_hu(_1, _2); +} +// CHECK-LABEL: @vslt_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslt.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vslt_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vslt_wu(_1, _2); +} +// CHECK-LABEL: @vslt_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslt.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vslt_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vslt_du(_1, _2); +} +// CHECK-LABEL: @vslti_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslti.bu(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vslti_bu(v16u8 _1) { return __builtin_lsx_vslti_bu(_1, 1); } +// CHECK-LABEL: @vslti_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslti.hu(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vslti_hu(v8u16 _1) { return __builtin_lsx_vslti_hu(_1, 1); } +// CHECK-LABEL: @vslti_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslti.wu(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vslti_wu(v4u32 _1) { return __builtin_lsx_vslti_wu(_1, 1); } +// CHECK-LABEL: @vslti_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslti.du(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vslti_du(v2u64 _1) { return __builtin_lsx_vslti_du(_1, 1); } +// CHECK-LABEL: @vsle_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsle.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vsle_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vsle_b(_1, _2); } +// CHECK-LABEL: @vsle_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsle.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vsle_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vsle_h(_1, _2); } +// CHECK-LABEL: @vsle_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsle.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vsle_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vsle_w(_1, _2); } +// CHECK-LABEL: @vsle_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsle.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vsle_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vsle_d(_1, _2); } +// CHECK-LABEL: @vslei_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslei.b(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vslei_b(v16i8 _1) { return __builtin_lsx_vslei_b(_1, 1); } +// CHECK-LABEL: @vslei_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslei.h(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vslei_h(v8i16 _1) { return __builtin_lsx_vslei_h(_1, 1); } +// CHECK-LABEL: @vslei_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslei.w(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vslei_w(v4i32 _1) { return __builtin_lsx_vslei_w(_1, 1); } +// CHECK-LABEL: @vslei_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslei.d(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vslei_d(v2i64 _1) { return __builtin_lsx_vslei_d(_1, 1); } +// CHECK-LABEL: @vsle_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsle.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vsle_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vsle_bu(_1, _2); +} +// CHECK-LABEL: @vsle_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsle.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vsle_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vsle_hu(_1, _2); +} +// CHECK-LABEL: @vsle_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsle.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vsle_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vsle_wu(_1, _2); +} +// CHECK-LABEL: @vsle_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsle.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vsle_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vsle_du(_1, _2); +} +// CHECK-LABEL: @vslei_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vslei.bu(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vslei_bu(v16u8 _1) { return __builtin_lsx_vslei_bu(_1, 1); } +// CHECK-LABEL: @vslei_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vslei.hu(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vslei_hu(v8u16 _1) { return __builtin_lsx_vslei_hu(_1, 1); } +// CHECK-LABEL: @vslei_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vslei.wu(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vslei_wu(v4u32 _1) { return __builtin_lsx_vslei_wu(_1, 1); } +// CHECK-LABEL: @vslei_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vslei.du(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vslei_du(v2u64 _1) { return __builtin_lsx_vslei_du(_1, 1); } +// CHECK-LABEL: @vsat_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsat.b(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vsat_b(v16i8 _1) { return __builtin_lsx_vsat_b(_1, 1); } +// CHECK-LABEL: @vsat_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsat.h(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vsat_h(v8i16 _1) { return __builtin_lsx_vsat_h(_1, 1); } +// CHECK-LABEL: @vsat_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsat.w(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vsat_w(v4i32 _1) { return __builtin_lsx_vsat_w(_1, 1); } +// CHECK-LABEL: @vsat_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsat.d(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vsat_d(v2i64 _1) { return __builtin_lsx_vsat_d(_1, 1); } +// CHECK-LABEL: @vsat_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsat.bu(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16u8 vsat_bu(v16u8 _1) { return __builtin_lsx_vsat_bu(_1, 1); } +// CHECK-LABEL: @vsat_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsat.hu(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8u16 vsat_hu(v8u16 _1) { return __builtin_lsx_vsat_hu(_1, 1); } +// CHECK-LABEL: @vsat_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsat.wu(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4u32 vsat_wu(v4u32 _1) { return __builtin_lsx_vsat_wu(_1, 1); } +// CHECK-LABEL: @vsat_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsat.du(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2u64 vsat_du(v2u64 _1) { return __builtin_lsx_vsat_du(_1, 1); } +// CHECK-LABEL: @vadda_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vadda.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vadda_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vadda_b(_1, _2); +} +// CHECK-LABEL: @vadda_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vadda.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vadda_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vadda_h(_1, _2); +} +// CHECK-LABEL: @vadda_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vadda.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vadda_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vadda_w(_1, _2); +} +// CHECK-LABEL: @vadda_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vadda.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vadda_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vadda_d(_1, _2); +} +// CHECK-LABEL: @vsadd_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsadd.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vsadd_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vsadd_b(_1, _2); +} +// CHECK-LABEL: @vsadd_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsadd.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vsadd_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vsadd_h(_1, _2); +} +// CHECK-LABEL: @vsadd_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsadd.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vsadd_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vsadd_w(_1, _2); +} +// CHECK-LABEL: @vsadd_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsadd.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vsadd_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vsadd_d(_1, _2); +} +// CHECK-LABEL: @vsadd_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsadd.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vsadd_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vsadd_bu(_1, _2); +} +// CHECK-LABEL: @vsadd_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsadd.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8u16 vsadd_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vsadd_hu(_1, _2); +} +// CHECK-LABEL: @vsadd_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsadd.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4u32 vsadd_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vsadd_wu(_1, _2); +} +// CHECK-LABEL: @vsadd_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsadd.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2u64 vsadd_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vsadd_du(_1, _2); +} +// CHECK-LABEL: @vavg_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavg.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vavg_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vavg_b(_1, _2); } +// CHECK-LABEL: @vavg_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavg.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vavg_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vavg_h(_1, _2); } +// CHECK-LABEL: @vavg_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavg.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vavg_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vavg_w(_1, _2); } +// CHECK-LABEL: @vavg_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavg.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vavg_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vavg_d(_1, _2); } +// CHECK-LABEL: @vavg_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavg.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vavg_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vavg_bu(_1, _2); +} +// CHECK-LABEL: @vavg_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavg.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8u16 vavg_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vavg_hu(_1, _2); +} +// CHECK-LABEL: @vavg_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavg.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4u32 vavg_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vavg_wu(_1, _2); +} +// CHECK-LABEL: @vavg_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavg.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2u64 vavg_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vavg_du(_1, _2); +} +// CHECK-LABEL: @vavgr_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavgr.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vavgr_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vavgr_b(_1, _2); +} +// CHECK-LABEL: @vavgr_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavgr.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vavgr_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vavgr_h(_1, _2); +} +// CHECK-LABEL: @vavgr_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavgr.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vavgr_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vavgr_w(_1, _2); +} +// CHECK-LABEL: @vavgr_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavgr.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vavgr_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vavgr_d(_1, _2); +} +// CHECK-LABEL: @vavgr_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vavgr.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vavgr_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vavgr_bu(_1, _2); +} +// CHECK-LABEL: @vavgr_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vavgr.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8u16 vavgr_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vavgr_hu(_1, _2); +} +// CHECK-LABEL: @vavgr_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vavgr.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4u32 vavgr_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vavgr_wu(_1, _2); +} +// CHECK-LABEL: @vavgr_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vavgr.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2u64 vavgr_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vavgr_du(_1, _2); +} +// CHECK-LABEL: @vssub_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssub.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vssub_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vssub_b(_1, _2); +} +// CHECK-LABEL: @vssub_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssub.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vssub_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vssub_h(_1, _2); +} +// CHECK-LABEL: @vssub_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssub.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vssub_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vssub_w(_1, _2); +} +// CHECK-LABEL: @vssub_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssub.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vssub_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vssub_d(_1, _2); +} +// CHECK-LABEL: @vssub_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssub.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vssub_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vssub_bu(_1, _2); +} +// CHECK-LABEL: @vssub_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssub.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8u16 vssub_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vssub_hu(_1, _2); +} +// CHECK-LABEL: @vssub_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssub.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4u32 vssub_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vssub_wu(_1, _2); +} +// CHECK-LABEL: @vssub_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssub.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2u64 vssub_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vssub_du(_1, _2); +} +// CHECK-LABEL: @vabsd_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vabsd.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vabsd_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vabsd_b(_1, _2); +} +// CHECK-LABEL: @vabsd_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vabsd.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vabsd_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vabsd_h(_1, _2); +} +// CHECK-LABEL: @vabsd_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vabsd.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vabsd_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vabsd_w(_1, _2); +} +// CHECK-LABEL: @vabsd_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vabsd.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vabsd_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vabsd_d(_1, _2); +} +// CHECK-LABEL: @vabsd_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vabsd.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vabsd_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vabsd_bu(_1, _2); +} +// CHECK-LABEL: @vabsd_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vabsd.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8u16 vabsd_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vabsd_hu(_1, _2); +} +// CHECK-LABEL: @vabsd_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vabsd.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4u32 vabsd_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vabsd_wu(_1, _2); +} +// CHECK-LABEL: @vabsd_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vabsd.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2u64 vabsd_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vabsd_du(_1, _2); +} +// CHECK-LABEL: @vmul_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmul.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vmul_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vmul_b(_1, _2); } +// CHECK-LABEL: @vmul_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmul.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vmul_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vmul_h(_1, _2); } +// CHECK-LABEL: @vmul_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmul.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vmul_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vmul_w(_1, _2); } +// CHECK-LABEL: @vmul_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmul.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vmul_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vmul_d(_1, _2); } +// CHECK-LABEL: @vmadd_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmadd.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v16i8 vmadd_b(v16i8 _1, v16i8 _2, v16i8 _3) { + return __builtin_lsx_vmadd_b(_1, _2, _3); +} +// CHECK-LABEL: @vmadd_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmadd.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v8i16 vmadd_h(v8i16 _1, v8i16 _2, v8i16 _3) { + return __builtin_lsx_vmadd_h(_1, _2, _3); +} +// CHECK-LABEL: @vmadd_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmadd.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v4i32 vmadd_w(v4i32 _1, v4i32 _2, v4i32 _3) { + return __builtin_lsx_vmadd_w(_1, _2, _3); +} +// CHECK-LABEL: @vmadd_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmadd.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v2i64 vmadd_d(v2i64 _1, v2i64 _2, v2i64 _3) { + return __builtin_lsx_vmadd_d(_1, _2, _3); +} +// CHECK-LABEL: @vmsub_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmsub.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v16i8 vmsub_b(v16i8 _1, v16i8 _2, v16i8 _3) { + return __builtin_lsx_vmsub_b(_1, _2, _3); +} +// CHECK-LABEL: @vmsub_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmsub.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v8i16 vmsub_h(v8i16 _1, v8i16 _2, v8i16 _3) { + return __builtin_lsx_vmsub_h(_1, _2, _3); +} +// CHECK-LABEL: @vmsub_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmsub.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v4i32 vmsub_w(v4i32 _1, v4i32 _2, v4i32 _3) { + return __builtin_lsx_vmsub_w(_1, _2, _3); +} +// CHECK-LABEL: @vmsub_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmsub.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v2i64 vmsub_d(v2i64 _1, v2i64 _2, v2i64 _3) { + return __builtin_lsx_vmsub_d(_1, _2, _3); +} +// CHECK-LABEL: @vdiv_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vdiv.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vdiv_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vdiv_b(_1, _2); } +// CHECK-LABEL: @vdiv_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vdiv.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vdiv_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vdiv_h(_1, _2); } +// CHECK-LABEL: @vdiv_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vdiv.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vdiv_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vdiv_w(_1, _2); } +// CHECK-LABEL: @vdiv_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vdiv.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vdiv_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vdiv_d(_1, _2); } +// CHECK-LABEL: @vdiv_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vdiv.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vdiv_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vdiv_bu(_1, _2); +} +// CHECK-LABEL: @vdiv_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vdiv.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8u16 vdiv_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vdiv_hu(_1, _2); +} +// CHECK-LABEL: @vdiv_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vdiv.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4u32 vdiv_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vdiv_wu(_1, _2); +} +// CHECK-LABEL: @vdiv_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vdiv.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2u64 vdiv_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vdiv_du(_1, _2); +} +// CHECK-LABEL: @vhaddw_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhaddw.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vhaddw_h_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vhaddw_h_b(_1, _2); +} +// CHECK-LABEL: @vhaddw_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhaddw.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vhaddw_w_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vhaddw_w_h(_1, _2); +} +// CHECK-LABEL: @vhaddw_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vhaddw_d_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vhaddw_d_w(_1, _2); +} +// CHECK-LABEL: @vhaddw_hu_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhaddw.hu.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8u16 vhaddw_hu_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vhaddw_hu_bu(_1, _2); +} +// CHECK-LABEL: @vhaddw_wu_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhaddw.wu.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4u32 vhaddw_wu_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vhaddw_wu_hu(_1, _2); +} +// CHECK-LABEL: @vhaddw_du_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.du.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2u64 vhaddw_du_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vhaddw_du_wu(_1, _2); +} +// CHECK-LABEL: @vhsubw_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhsubw.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vhsubw_h_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vhsubw_h_b(_1, _2); +} +// CHECK-LABEL: @vhsubw_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhsubw.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vhsubw_w_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vhsubw_w_h(_1, _2); +} +// CHECK-LABEL: @vhsubw_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vhsubw_d_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vhsubw_d_w(_1, _2); +} +// CHECK-LABEL: @vhsubw_hu_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vhsubw.hu.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vhsubw_hu_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vhsubw_hu_bu(_1, _2); +} +// CHECK-LABEL: @vhsubw_wu_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vhsubw.wu.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vhsubw_wu_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vhsubw_wu_hu(_1, _2); +} +// CHECK-LABEL: @vhsubw_du_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.du.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vhsubw_du_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vhsubw_du_wu(_1, _2); +} +// CHECK-LABEL: @vmod_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmod.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vmod_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vmod_b(_1, _2); } +// CHECK-LABEL: @vmod_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmod.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vmod_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vmod_h(_1, _2); } +// CHECK-LABEL: @vmod_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmod.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vmod_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vmod_w(_1, _2); } +// CHECK-LABEL: @vmod_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmod.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vmod_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vmod_d(_1, _2); } +// CHECK-LABEL: @vmod_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmod.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vmod_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vmod_bu(_1, _2); +} +// CHECK-LABEL: @vmod_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmod.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8u16 vmod_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vmod_hu(_1, _2); +} +// CHECK-LABEL: @vmod_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmod.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4u32 vmod_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vmod_wu(_1, _2); +} +// CHECK-LABEL: @vmod_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmod.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2u64 vmod_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vmod_du(_1, _2); +} +// CHECK-LABEL: @vreplve_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vreplve.b(<16 x i8> [[TMP0]], i32 [[_2:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vreplve_b(v16i8 _1, int _2) { + return __builtin_lsx_vreplve_b(_1, _2); +} +// CHECK-LABEL: @vreplve_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vreplve.h(<8 x i16> [[TMP0]], i32 [[_2:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vreplve_h(v8i16 _1, int _2) { + return __builtin_lsx_vreplve_h(_1, _2); +} +// CHECK-LABEL: @vreplve_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vreplve.w(<4 x i32> [[TMP0]], i32 [[_2:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vreplve_w(v4i32 _1, int _2) { + return __builtin_lsx_vreplve_w(_1, _2); +} +// CHECK-LABEL: @vreplve_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vreplve.d(<2 x i64> [[TMP0]], i32 [[_2:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vreplve_d(v2i64 _1, int _2) { + return __builtin_lsx_vreplve_d(_1, _2); +} +// CHECK-LABEL: @vreplvei_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vreplvei.b(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vreplvei_b(v16i8 _1) { return __builtin_lsx_vreplvei_b(_1, 1); } +// CHECK-LABEL: @vreplvei_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vreplvei.h(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vreplvei_h(v8i16 _1) { return __builtin_lsx_vreplvei_h(_1, 1); } +// CHECK-LABEL: @vreplvei_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vreplvei.w(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vreplvei_w(v4i32 _1) { return __builtin_lsx_vreplvei_w(_1, 1); } +// CHECK-LABEL: @vreplvei_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vreplvei.d(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vreplvei_d(v2i64 _1) { return __builtin_lsx_vreplvei_d(_1, 1); } +// CHECK-LABEL: @vpickev_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpickev.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vpickev_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vpickev_b(_1, _2); +} +// CHECK-LABEL: @vpickev_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpickev.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vpickev_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vpickev_h(_1, _2); +} +// CHECK-LABEL: @vpickev_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpickev.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vpickev_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vpickev_w(_1, _2); +} +// CHECK-LABEL: @vpickev_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpickev.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vpickev_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vpickev_d(_1, _2); +} +// CHECK-LABEL: @vpickod_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpickod.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vpickod_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vpickod_b(_1, _2); +} +// CHECK-LABEL: @vpickod_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpickod.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vpickod_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vpickod_h(_1, _2); +} +// CHECK-LABEL: @vpickod_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpickod.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vpickod_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vpickod_w(_1, _2); +} +// CHECK-LABEL: @vpickod_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpickod.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vpickod_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vpickod_d(_1, _2); +} +// CHECK-LABEL: @vilvh_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vilvh.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vilvh_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vilvh_b(_1, _2); +} +// CHECK-LABEL: @vilvh_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vilvh.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vilvh_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vilvh_h(_1, _2); +} +// CHECK-LABEL: @vilvh_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vilvh.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vilvh_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vilvh_w(_1, _2); +} +// CHECK-LABEL: @vilvh_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vilvh.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vilvh_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vilvh_d(_1, _2); +} +// CHECK-LABEL: @vilvl_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vilvl.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vilvl_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vilvl_b(_1, _2); +} +// CHECK-LABEL: @vilvl_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vilvl.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vilvl_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vilvl_h(_1, _2); +} +// CHECK-LABEL: @vilvl_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vilvl.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vilvl_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vilvl_w(_1, _2); +} +// CHECK-LABEL: @vilvl_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vilvl.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vilvl_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vilvl_d(_1, _2); +} +// CHECK-LABEL: @vpackev_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpackev.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vpackev_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vpackev_b(_1, _2); +} +// CHECK-LABEL: @vpackev_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpackev.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vpackev_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vpackev_h(_1, _2); +} +// CHECK-LABEL: @vpackev_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpackev.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vpackev_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vpackev_w(_1, _2); +} +// CHECK-LABEL: @vpackev_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpackev.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vpackev_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vpackev_d(_1, _2); +} +// CHECK-LABEL: @vpackod_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpackod.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vpackod_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vpackod_b(_1, _2); +} +// CHECK-LABEL: @vpackod_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpackod.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vpackod_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vpackod_h(_1, _2); +} +// CHECK-LABEL: @vpackod_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpackod.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vpackod_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vpackod_w(_1, _2); +} +// CHECK-LABEL: @vpackod_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpackod.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vpackod_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vpackod_d(_1, _2); +} +// CHECK-LABEL: @vshuf_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vshuf.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v8i16 vshuf_h(v8i16 _1, v8i16 _2, v8i16 _3) { + return __builtin_lsx_vshuf_h(_1, _2, _3); +} +// CHECK-LABEL: @vshuf_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vshuf.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v4i32 vshuf_w(v4i32 _1, v4i32 _2, v4i32 _3) { + return __builtin_lsx_vshuf_w(_1, _2, _3); +} +// CHECK-LABEL: @vshuf_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vshuf.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v2i64 vshuf_d(v2i64 _1, v2i64 _2, v2i64 _3) { + return __builtin_lsx_vshuf_d(_1, _2, _3); +} +// CHECK-LABEL: @vand_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vand.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vand_v(v16u8 _1, v16u8 _2) { return __builtin_lsx_vand_v(_1, _2); } +// CHECK-LABEL: @vandi_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vandi.b(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16u8 vandi_b(v16u8 _1) { return __builtin_lsx_vandi_b(_1, 1); } +// CHECK-LABEL: @vor_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vor.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vor_v(v16u8 _1, v16u8 _2) { return __builtin_lsx_vor_v(_1, _2); } +// CHECK-LABEL: @vori_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vori.b(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16u8 vori_b(v16u8 _1) { return __builtin_lsx_vori_b(_1, 1); } +// CHECK-LABEL: @vnor_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vnor.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vnor_v(v16u8 _1, v16u8 _2) { return __builtin_lsx_vnor_v(_1, _2); } +// CHECK-LABEL: @vnori_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vnori.b(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16u8 vnori_b(v16u8 _1) { return __builtin_lsx_vnori_b(_1, 1); } +// CHECK-LABEL: @vxor_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vxor.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vxor_v(v16u8 _1, v16u8 _2) { return __builtin_lsx_vxor_v(_1, _2); } +// CHECK-LABEL: @vxori_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vxori.b(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16u8 vxori_b(v16u8 _1) { return __builtin_lsx_vxori_b(_1, 1); } +// CHECK-LABEL: @vbitsel_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitsel.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v16u8 vbitsel_v(v16u8 _1, v16u8 _2, v16u8 _3) { + return __builtin_lsx_vbitsel_v(_1, _2, _3); +} +// CHECK-LABEL: @vbitseli_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbitseli.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vbitseli_b(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vbitseli_b(_1, _2, 1); +} +// CHECK-LABEL: @vshuf4i_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vshuf4i.b(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vshuf4i_b(v16i8 _1) { return __builtin_lsx_vshuf4i_b(_1, 1); } +// CHECK-LABEL: @vshuf4i_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vshuf4i.h(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vshuf4i_h(v8i16 _1) { return __builtin_lsx_vshuf4i_h(_1, 1); } +// CHECK-LABEL: @vshuf4i_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vshuf4i.w(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vshuf4i_w(v4i32 _1) { return __builtin_lsx_vshuf4i_w(_1, 1); } +// CHECK-LABEL: @vreplgr2vr_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vreplgr2vr.b(i32 [[_1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to i128 +// CHECK-NEXT: ret i128 [[TMP1]] +// +v16i8 vreplgr2vr_b(int _1) { return __builtin_lsx_vreplgr2vr_b(_1); } +// CHECK-LABEL: @vreplgr2vr_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vreplgr2vr.h(i32 [[_1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to i128 +// CHECK-NEXT: ret i128 [[TMP1]] +// +v8i16 vreplgr2vr_h(int _1) { return __builtin_lsx_vreplgr2vr_h(_1); } +// CHECK-LABEL: @vreplgr2vr_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vreplgr2vr.w(i32 [[_1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to i128 +// CHECK-NEXT: ret i128 [[TMP1]] +// +v4i32 vreplgr2vr_w(int _1) { return __builtin_lsx_vreplgr2vr_w(_1); } +// CHECK-LABEL: @vreplgr2vr_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vreplgr2vr.d(i64 [[_1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to i128 +// CHECK-NEXT: ret i128 [[TMP1]] +// +v2i64 vreplgr2vr_d(long _1) { return __builtin_lsx_vreplgr2vr_d(_1); } +// CHECK-LABEL: @vpcnt_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vpcnt.b(<16 x i8> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vpcnt_b(v16i8 _1) { return __builtin_lsx_vpcnt_b(_1); } +// CHECK-LABEL: @vpcnt_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vpcnt.h(<8 x i16> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vpcnt_h(v8i16 _1) { return __builtin_lsx_vpcnt_h(_1); } +// CHECK-LABEL: @vpcnt_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpcnt.w(<4 x i32> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vpcnt_w(v4i32 _1) { return __builtin_lsx_vpcnt_w(_1); } +// CHECK-LABEL: @vpcnt_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vpcnt.d(<2 x i64> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vpcnt_d(v2i64 _1) { return __builtin_lsx_vpcnt_d(_1); } +// CHECK-LABEL: @vclo_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vclo.b(<16 x i8> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vclo_b(v16i8 _1) { return __builtin_lsx_vclo_b(_1); } +// CHECK-LABEL: @vclo_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vclo.h(<8 x i16> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vclo_h(v8i16 _1) { return __builtin_lsx_vclo_h(_1); } +// CHECK-LABEL: @vclo_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vclo.w(<4 x i32> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vclo_w(v4i32 _1) { return __builtin_lsx_vclo_w(_1); } +// CHECK-LABEL: @vclo_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vclo.d(<2 x i64> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vclo_d(v2i64 _1) { return __builtin_lsx_vclo_d(_1); } +// CHECK-LABEL: @vclz_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vclz.b(<16 x i8> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vclz_b(v16i8 _1) { return __builtin_lsx_vclz_b(_1); } +// CHECK-LABEL: @vclz_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vclz.h(<8 x i16> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vclz_h(v8i16 _1) { return __builtin_lsx_vclz_h(_1); } +// CHECK-LABEL: @vclz_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vclz.w(<4 x i32> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vclz_w(v4i32 _1) { return __builtin_lsx_vclz_w(_1); } +// CHECK-LABEL: @vclz_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vclz.d(<2 x i64> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vclz_d(v2i64 _1) { return __builtin_lsx_vclz_d(_1); } +// CHECK-LABEL: @vpickve2gr_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int vpickve2gr_b(v16i8 _1) { return __builtin_lsx_vpickve2gr_b(_1, 1); } +// CHECK-LABEL: @vpickve2gr_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.h(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int vpickve2gr_h(v8i16 _1) { return __builtin_lsx_vpickve2gr_h(_1, 1); } +// CHECK-LABEL: @vpickve2gr_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.w(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int vpickve2gr_w(v4i32 _1) { return __builtin_lsx_vpickve2gr_w(_1, 1); } +// CHECK-LABEL: @vpickve2gr_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.loongarch.lsx.vpickve2gr.d(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: ret i64 [[TMP1]] +// +long vpickve2gr_d(v2i64 _1) { return __builtin_lsx_vpickve2gr_d(_1, 1); } +// CHECK-LABEL: @vpickve2gr_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.bu(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: ret i32 [[TMP1]] +// +unsigned int vpickve2gr_bu(v16i8 _1) { + return __builtin_lsx_vpickve2gr_bu(_1, 1); +} +// CHECK-LABEL: @vpickve2gr_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.hu(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: ret i32 [[TMP1]] +// +unsigned int vpickve2gr_hu(v8i16 _1) { + return __builtin_lsx_vpickve2gr_hu(_1, 1); +} +// CHECK-LABEL: @vpickve2gr_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.vpickve2gr.wu(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: ret i32 [[TMP1]] +// +unsigned int vpickve2gr_wu(v4i32 _1) { + return __builtin_lsx_vpickve2gr_wu(_1, 1); +} +// CHECK-LABEL: @vpickve2gr_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.loongarch.lsx.vpickve2gr.du(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: ret i64 [[TMP1]] +// +unsigned long int vpickve2gr_du(v2i64 _1) { + return __builtin_lsx_vpickve2gr_du(_1, 1); +} +// CHECK-LABEL: @vinsgr2vr_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vinsgr2vr.b(<16 x i8> [[TMP0]], i32 1, i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vinsgr2vr_b(v16i8 _1) { + return __builtin_lsx_vinsgr2vr_b(_1, 1, 1); +} +// CHECK-LABEL: @vinsgr2vr_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vinsgr2vr.h(<8 x i16> [[TMP0]], i32 1, i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vinsgr2vr_h(v8i16 _1) { + return __builtin_lsx_vinsgr2vr_h(_1, 1, 1); +} +// CHECK-LABEL: @vinsgr2vr_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vinsgr2vr.w(<4 x i32> [[TMP0]], i32 1, i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vinsgr2vr_w(v4i32 _1) { + return __builtin_lsx_vinsgr2vr_w(_1, 1, 1); +} +// CHECK-LABEL: @vinsgr2vr_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vinsgr2vr.d(<2 x i64> [[TMP0]], i64 1, i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vinsgr2vr_d(v2i64 _1) { + return __builtin_lsx_vinsgr2vr_d(_1, 1, 1); +} +// CHECK-LABEL: @vfadd_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfadd.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4f32 vfadd_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfadd_s(_1, _2); +} +// CHECK-LABEL: @vfadd_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfadd.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2f64 vfadd_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfadd_d(_1, _2); +} +// CHECK-LABEL: @vfsub_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfsub.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4f32 vfsub_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfsub_s(_1, _2); +} +// CHECK-LABEL: @vfsub_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfsub.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2f64 vfsub_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfsub_d(_1, _2); +} +// CHECK-LABEL: @vfmul_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmul.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4f32 vfmul_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfmul_s(_1, _2); +} +// CHECK-LABEL: @vfmul_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmul.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2f64 vfmul_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfmul_d(_1, _2); +} +// CHECK-LABEL: @vfdiv_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfdiv.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4f32 vfdiv_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfdiv_s(_1, _2); +} +// CHECK-LABEL: @vfdiv_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfdiv.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2f64 vfdiv_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfdiv_d(_1, _2); +} +// CHECK-LABEL: @vfcvt_h_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vfcvt.h.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vfcvt_h_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcvt_h_s(_1, _2); +} +// CHECK-LABEL: @vfcvt_s_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfcvt.s.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4f32 vfcvt_s_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcvt_s_d(_1, _2); +} +// CHECK-LABEL: @vfmin_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmin.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4f32 vfmin_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfmin_s(_1, _2); +} +// CHECK-LABEL: @vfmin_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmin.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2f64 vfmin_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfmin_d(_1, _2); +} +// CHECK-LABEL: @vfmina_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmina.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4f32 vfmina_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfmina_s(_1, _2); +} +// CHECK-LABEL: @vfmina_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmina.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2f64 vfmina_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfmina_d(_1, _2); +} +// CHECK-LABEL: @vfmax_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmax.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4f32 vfmax_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfmax_s(_1, _2); +} +// CHECK-LABEL: @vfmax_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmax.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2f64 vfmax_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfmax_d(_1, _2); +} +// CHECK-LABEL: @vfmaxa_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmaxa.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4f32 vfmaxa_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfmaxa_s(_1, _2); +} +// CHECK-LABEL: @vfmaxa_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmaxa.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2f64 vfmaxa_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfmaxa_d(_1, _2); +} +// CHECK-LABEL: @vfclass_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfclass.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vfclass_s(v4f32 _1) { return __builtin_lsx_vfclass_s(_1); } +// CHECK-LABEL: @vfclass_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfclass.d(<2 x double> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vfclass_d(v2f64 _1) { return __builtin_lsx_vfclass_d(_1); } +// CHECK-LABEL: @vfsqrt_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfsqrt.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4f32 vfsqrt_s(v4f32 _1) { return __builtin_lsx_vfsqrt_s(_1); } +// CHECK-LABEL: @vfsqrt_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfsqrt.d(<2 x double> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2f64 vfsqrt_d(v2f64 _1) { return __builtin_lsx_vfsqrt_d(_1); } +// CHECK-LABEL: @vfrecip_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrecip.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4f32 vfrecip_s(v4f32 _1) { return __builtin_lsx_vfrecip_s(_1); } +// CHECK-LABEL: @vfrecip_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrecip.d(<2 x double> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2f64 vfrecip_d(v2f64 _1) { return __builtin_lsx_vfrecip_d(_1); } +// CHECK-LABEL: @vfrint_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrint.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4f32 vfrint_s(v4f32 _1) { return __builtin_lsx_vfrint_s(_1); } +// CHECK-LABEL: @vfrint_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrint.d(<2 x double> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2f64 vfrint_d(v2f64 _1) { return __builtin_lsx_vfrint_d(_1); } +// CHECK-LABEL: @vfrsqrt_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrsqrt.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4f32 vfrsqrt_s(v4f32 _1) { return __builtin_lsx_vfrsqrt_s(_1); } +// CHECK-LABEL: @vfrsqrt_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrsqrt.d(<2 x double> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2f64 vfrsqrt_d(v2f64 _1) { return __builtin_lsx_vfrsqrt_d(_1); } +// CHECK-LABEL: @vflogb_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vflogb.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4f32 vflogb_s(v4f32 _1) { return __builtin_lsx_vflogb_s(_1); } +// CHECK-LABEL: @vflogb_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vflogb.d(<2 x double> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2f64 vflogb_d(v2f64 _1) { return __builtin_lsx_vflogb_d(_1); } +// CHECK-LABEL: @vfcvth_s_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfcvth.s.h(<8 x i16> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4f32 vfcvth_s_h(v8i16 _1) { return __builtin_lsx_vfcvth_s_h(_1); } +// CHECK-LABEL: @vfcvth_d_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfcvth.d.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2f64 vfcvth_d_s(v4f32 _1) { return __builtin_lsx_vfcvth_d_s(_1); } +// CHECK-LABEL: @vfcvtl_s_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfcvtl.s.h(<8 x i16> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4f32 vfcvtl_s_h(v8i16 _1) { return __builtin_lsx_vfcvtl_s_h(_1); } +// CHECK-LABEL: @vfcvtl_d_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfcvtl.d.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2f64 vfcvtl_d_s(v4f32 _1) { return __builtin_lsx_vfcvtl_d_s(_1); } +// CHECK-LABEL: @vftint_w_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftint.w.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vftint_w_s(v4f32 _1) { return __builtin_lsx_vftint_w_s(_1); } +// CHECK-LABEL: @vftint_l_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftint.l.d(<2 x double> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vftint_l_d(v2f64 _1) { return __builtin_lsx_vftint_l_d(_1); } +// CHECK-LABEL: @vftint_wu_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftint.wu.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4u32 vftint_wu_s(v4f32 _1) { return __builtin_lsx_vftint_wu_s(_1); } +// CHECK-LABEL: @vftint_lu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftint.lu.d(<2 x double> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2u64 vftint_lu_d(v2f64 _1) { return __builtin_lsx_vftint_lu_d(_1); } +// CHECK-LABEL: @vftintrz_w_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vftintrz_w_s(v4f32 _1) { return __builtin_lsx_vftintrz_w_s(_1); } +// CHECK-LABEL: @vftintrz_l_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrz.l.d(<2 x double> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vftintrz_l_d(v2f64 _1) { return __builtin_lsx_vftintrz_l_d(_1); } +// CHECK-LABEL: @vftintrz_wu_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.wu.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4u32 vftintrz_wu_s(v4f32 _1) { return __builtin_lsx_vftintrz_wu_s(_1); } +// CHECK-LABEL: @vftintrz_lu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrz.lu.d(<2 x double> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2u64 vftintrz_lu_d(v2f64 _1) { return __builtin_lsx_vftintrz_lu_d(_1); } +// CHECK-LABEL: @vffint_s_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.w(<4 x i32> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4f32 vffint_s_w(v4i32 _1) { return __builtin_lsx_vffint_s_w(_1); } +// CHECK-LABEL: @vffint_d_l( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffint.d.l(<2 x i64> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2f64 vffint_d_l(v2i64 _1) { return __builtin_lsx_vffint_d_l(_1); } +// CHECK-LABEL: @vffint_s_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.wu(<4 x i32> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4f32 vffint_s_wu(v4u32 _1) { return __builtin_lsx_vffint_s_wu(_1); } +// CHECK-LABEL: @vffint_d_lu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffint.d.lu(<2 x i64> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2f64 vffint_d_lu(v2u64 _1) { return __builtin_lsx_vffint_d_lu(_1); } +// CHECK-LABEL: @vandn_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vandn.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vandn_v(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vandn_v(_1, _2); +} +// CHECK-LABEL: @vneg_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vneg.b(<16 x i8> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vneg_b(v16i8 _1) { return __builtin_lsx_vneg_b(_1); } +// CHECK-LABEL: @vneg_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vneg.h(<8 x i16> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vneg_h(v8i16 _1) { return __builtin_lsx_vneg_h(_1); } +// CHECK-LABEL: @vneg_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vneg.w(<4 x i32> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vneg_w(v4i32 _1) { return __builtin_lsx_vneg_w(_1); } +// CHECK-LABEL: @vneg_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vneg.d(<2 x i64> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vneg_d(v2i64 _1) { return __builtin_lsx_vneg_d(_1); } +// CHECK-LABEL: @vmuh_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmuh.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vmuh_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vmuh_b(_1, _2); } +// CHECK-LABEL: @vmuh_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmuh.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vmuh_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vmuh_h(_1, _2); } +// CHECK-LABEL: @vmuh_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmuh.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vmuh_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vmuh_w(_1, _2); } +// CHECK-LABEL: @vmuh_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmuh.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vmuh_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vmuh_d(_1, _2); } +// CHECK-LABEL: @vmuh_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmuh.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vmuh_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vmuh_bu(_1, _2); +} +// CHECK-LABEL: @vmuh_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmuh.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8u16 vmuh_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vmuh_hu(_1, _2); +} +// CHECK-LABEL: @vmuh_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmuh.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4u32 vmuh_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vmuh_wu(_1, _2); +} +// CHECK-LABEL: @vmuh_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmuh.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2u64 vmuh_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vmuh_du(_1, _2); +} +// CHECK-LABEL: @vsllwil_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsllwil.h.b(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vsllwil_h_b(v16i8 _1) { return __builtin_lsx_vsllwil_h_b(_1, 1); } +// CHECK-LABEL: @vsllwil_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsllwil.w.h(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vsllwil_w_h(v8i16 _1) { return __builtin_lsx_vsllwil_w_h(_1, 1); } +// CHECK-LABEL: @vsllwil_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsllwil.d.w(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vsllwil_d_w(v4i32 _1) { return __builtin_lsx_vsllwil_d_w(_1, 1); } +// CHECK-LABEL: @vsllwil_hu_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsllwil.hu.bu(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8u16 vsllwil_hu_bu(v16u8 _1) { + return __builtin_lsx_vsllwil_hu_bu(_1, 1); +} +// CHECK-LABEL: @vsllwil_wu_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsllwil.wu.hu(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4u32 vsllwil_wu_hu(v8u16 _1) { + return __builtin_lsx_vsllwil_wu_hu(_1, 1); +} +// CHECK-LABEL: @vsllwil_du_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsllwil.du.wu(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2u64 vsllwil_du_wu(v4u32 _1) { + return __builtin_lsx_vsllwil_du_wu(_1, 1); +} +// CHECK-LABEL: @vsran_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsran.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vsran_b_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vsran_b_h(_1, _2); +} +// CHECK-LABEL: @vsran_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsran.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vsran_h_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vsran_h_w(_1, _2); +} +// CHECK-LABEL: @vsran_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsran.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vsran_w_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vsran_w_d(_1, _2); +} +// CHECK-LABEL: @vssran_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssran.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vssran_b_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vssran_b_h(_1, _2); +} +// CHECK-LABEL: @vssran_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssran.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vssran_h_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vssran_h_w(_1, _2); +} +// CHECK-LABEL: @vssran_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssran.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vssran_w_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vssran_w_d(_1, _2); +} +// CHECK-LABEL: @vssran_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssran.bu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vssran_bu_h(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vssran_bu_h(_1, _2); +} +// CHECK-LABEL: @vssran_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssran.hu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8u16 vssran_hu_w(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vssran_hu_w(_1, _2); +} +// CHECK-LABEL: @vssran_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssran.wu.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4u32 vssran_wu_d(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vssran_wu_d(_1, _2); +} +// CHECK-LABEL: @vsrarn_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrarn.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vsrarn_b_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vsrarn_b_h(_1, _2); +} +// CHECK-LABEL: @vsrarn_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrarn.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vsrarn_h_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vsrarn_h_w(_1, _2); +} +// CHECK-LABEL: @vsrarn_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrarn.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vsrarn_w_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vsrarn_w_d(_1, _2); +} +// CHECK-LABEL: @vssrarn_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarn.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vssrarn_b_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vssrarn_b_h(_1, _2); +} +// CHECK-LABEL: @vssrarn_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarn.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vssrarn_h_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vssrarn_h_w(_1, _2); +} +// CHECK-LABEL: @vssrarn_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarn.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vssrarn_w_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vssrarn_w_d(_1, _2); +} +// CHECK-LABEL: @vssrarn_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarn.bu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vssrarn_bu_h(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vssrarn_bu_h(_1, _2); +} +// CHECK-LABEL: @vssrarn_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarn.hu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8u16 vssrarn_hu_w(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vssrarn_hu_w(_1, _2); +} +// CHECK-LABEL: @vssrarn_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarn.wu.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4u32 vssrarn_wu_d(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vssrarn_wu_d(_1, _2); +} +// CHECK-LABEL: @vsrln_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrln.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vsrln_b_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vsrln_b_h(_1, _2); +} +// CHECK-LABEL: @vsrln_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrln.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vsrln_h_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vsrln_h_w(_1, _2); +} +// CHECK-LABEL: @vsrln_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrln.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vsrln_w_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vsrln_w_d(_1, _2); +} +// CHECK-LABEL: @vssrln_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrln.bu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vssrln_bu_h(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vssrln_bu_h(_1, _2); +} +// CHECK-LABEL: @vssrln_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrln.hu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8u16 vssrln_hu_w(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vssrln_hu_w(_1, _2); +} +// CHECK-LABEL: @vssrln_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrln.wu.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4u32 vssrln_wu_d(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vssrln_wu_d(_1, _2); +} +// CHECK-LABEL: @vsrlrn_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlrn.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vsrlrn_b_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vsrlrn_b_h(_1, _2); +} +// CHECK-LABEL: @vsrlrn_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlrn.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vsrlrn_h_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vsrlrn_h_w(_1, _2); +} +// CHECK-LABEL: @vsrlrn_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlrn.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vsrlrn_w_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vsrlrn_w_d(_1, _2); +} +// CHECK-LABEL: @vssrlrn_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrn.bu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vssrlrn_bu_h(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vssrlrn_bu_h(_1, _2); +} +// CHECK-LABEL: @vssrlrn_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrn.hu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8u16 vssrlrn_hu_w(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vssrlrn_hu_w(_1, _2); +} +// CHECK-LABEL: @vssrlrn_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrn.wu.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4u32 vssrlrn_wu_d(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vssrlrn_wu_d(_1, _2); +} +// CHECK-LABEL: @vfrstpi_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vfrstpi.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vfrstpi_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vfrstpi_b(_1, _2, 1); +} +// CHECK-LABEL: @vfrstpi_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vfrstpi.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vfrstpi_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vfrstpi_h(_1, _2, 1); +} +// CHECK-LABEL: @vfrstp_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vfrstp.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v16i8 vfrstp_b(v16i8 _1, v16i8 _2, v16i8 _3) { + return __builtin_lsx_vfrstp_b(_1, _2, _3); +} +// CHECK-LABEL: @vfrstp_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vfrstp.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v8i16 vfrstp_h(v8i16 _1, v8i16 _2, v8i16 _3) { + return __builtin_lsx_vfrstp_h(_1, _2, _3); +} +// CHECK-LABEL: @vshuf4i_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vshuf4i.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vshuf4i_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vshuf4i_d(_1, _2, 1); +} +// CHECK-LABEL: @vbsrl_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbsrl.v(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vbsrl_v(v16i8 _1) { return __builtin_lsx_vbsrl_v(_1, 1); } +// CHECK-LABEL: @vbsll_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vbsll.v(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vbsll_v(v16i8 _1) { return __builtin_lsx_vbsll_v(_1, 1); } +// CHECK-LABEL: @vextrins_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vextrins.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vextrins_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vextrins_b(_1, _2, 1); +} +// CHECK-LABEL: @vextrins_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vextrins.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vextrins_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vextrins_h(_1, _2, 1); +} +// CHECK-LABEL: @vextrins_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vextrins.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vextrins_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vextrins_w(_1, _2, 1); +} +// CHECK-LABEL: @vextrins_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vextrins.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vextrins_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vextrins_d(_1, _2, 1); +} +// CHECK-LABEL: @vmskltz_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmskltz.b(<16 x i8> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vmskltz_b(v16i8 _1) { return __builtin_lsx_vmskltz_b(_1); } +// CHECK-LABEL: @vmskltz_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmskltz.h(<8 x i16> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vmskltz_h(v8i16 _1) { return __builtin_lsx_vmskltz_h(_1); } +// CHECK-LABEL: @vmskltz_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmskltz.w(<4 x i32> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vmskltz_w(v4i32 _1) { return __builtin_lsx_vmskltz_w(_1); } +// CHECK-LABEL: @vmskltz_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmskltz.d(<2 x i64> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vmskltz_d(v2i64 _1) { return __builtin_lsx_vmskltz_d(_1); } +// CHECK-LABEL: @vsigncov_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsigncov.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vsigncov_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vsigncov_b(_1, _2); +} +// CHECK-LABEL: @vsigncov_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsigncov.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vsigncov_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vsigncov_h(_1, _2); +} +// CHECK-LABEL: @vsigncov_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsigncov.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vsigncov_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vsigncov_w(_1, _2); +} +// CHECK-LABEL: @vsigncov_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsigncov.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vsigncov_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vsigncov_d(_1, _2); +} +// CHECK-LABEL: @vfmadd_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmadd.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]], <4 x float> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v4f32 vfmadd_s(v4f32 _1, v4f32 _2, v4f32 _3) { + return __builtin_lsx_vfmadd_s(_1, _2, _3); +} +// CHECK-LABEL: @vfmadd_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmadd.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]], <2 x double> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v2f64 vfmadd_d(v2f64 _1, v2f64 _2, v2f64 _3) { + return __builtin_lsx_vfmadd_d(_1, _2, _3); +} +// CHECK-LABEL: @vfmsub_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfmsub.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]], <4 x float> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v4f32 vfmsub_s(v4f32 _1, v4f32 _2, v4f32 _3) { + return __builtin_lsx_vfmsub_s(_1, _2, _3); +} +// CHECK-LABEL: @vfmsub_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfmsub.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]], <2 x double> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v2f64 vfmsub_d(v2f64 _1, v2f64 _2, v2f64 _3) { + return __builtin_lsx_vfmsub_d(_1, _2, _3); +} +// CHECK-LABEL: @vfnmadd_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfnmadd.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]], <4 x float> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v4f32 vfnmadd_s(v4f32 _1, v4f32 _2, v4f32 _3) { + return __builtin_lsx_vfnmadd_s(_1, _2, _3); +} +// CHECK-LABEL: @vfnmadd_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfnmadd.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]], <2 x double> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v2f64 vfnmadd_d(v2f64 _1, v2f64 _2, v2f64 _3) { + return __builtin_lsx_vfnmadd_d(_1, _2, _3); +} +// CHECK-LABEL: @vfnmsub_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfnmsub.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]], <4 x float> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v4f32 vfnmsub_s(v4f32 _1, v4f32 _2, v4f32 _3) { + return __builtin_lsx_vfnmsub_s(_1, _2, _3); +} +// CHECK-LABEL: @vfnmsub_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfnmsub.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]], <2 x double> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v2f64 vfnmsub_d(v2f64 _1, v2f64 _2, v2f64 _3) { + return __builtin_lsx_vfnmsub_d(_1, _2, _3); +} +// CHECK-LABEL: @vftintrne_w_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vftintrne_w_s(v4f32 _1) { return __builtin_lsx_vftintrne_w_s(_1); } +// CHECK-LABEL: @vftintrne_l_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrne.l.d(<2 x double> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vftintrne_l_d(v2f64 _1) { return __builtin_lsx_vftintrne_l_d(_1); } +// CHECK-LABEL: @vftintrp_w_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vftintrp_w_s(v4f32 _1) { return __builtin_lsx_vftintrp_w_s(_1); } +// CHECK-LABEL: @vftintrp_l_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrp.l.d(<2 x double> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vftintrp_l_d(v2f64 _1) { return __builtin_lsx_vftintrp_l_d(_1); } +// CHECK-LABEL: @vftintrm_w_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vftintrm_w_s(v4f32 _1) { return __builtin_lsx_vftintrm_w_s(_1); } +// CHECK-LABEL: @vftintrm_l_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrm.l.d(<2 x double> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vftintrm_l_d(v2f64 _1) { return __builtin_lsx_vftintrm_l_d(_1); } +// CHECK-LABEL: @vftint_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftint.w.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vftint_w_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vftint_w_d(_1, _2); +} +// CHECK-LABEL: @vffint_s_l( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.l(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4f32 vffint_s_l(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vffint_s_l(_1, _2); +} +// CHECK-LABEL: @vftintrz_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vftintrz_w_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vftintrz_w_d(_1, _2); +} +// CHECK-LABEL: @vftintrp_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vftintrp_w_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vftintrp_w_d(_1, _2); +} +// CHECK-LABEL: @vftintrm_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vftintrm_w_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vftintrm_w_d(_1, _2); +} +// CHECK-LABEL: @vftintrne_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vftintrne_w_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vftintrne_w_d(_1, _2); +} +// CHECK-LABEL: @vftintl_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintl.l.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vftintl_l_s(v4f32 _1) { return __builtin_lsx_vftintl_l_s(_1); } +// CHECK-LABEL: @vftinth_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftinth.l.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vftinth_l_s(v4f32 _1) { return __builtin_lsx_vftinth_l_s(_1); } +// CHECK-LABEL: @vffinth_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffinth.d.w(<4 x i32> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2f64 vffinth_d_w(v4i32 _1) { return __builtin_lsx_vffinth_d_w(_1); } +// CHECK-LABEL: @vffintl_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vffintl.d.w(<4 x i32> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2f64 vffintl_d_w(v4i32 _1) { return __builtin_lsx_vffintl_d_w(_1); } +// CHECK-LABEL: @vftintrzl_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrzl.l.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vftintrzl_l_s(v4f32 _1) { return __builtin_lsx_vftintrzl_l_s(_1); } +// CHECK-LABEL: @vftintrzh_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrzh.l.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vftintrzh_l_s(v4f32 _1) { return __builtin_lsx_vftintrzh_l_s(_1); } +// CHECK-LABEL: @vftintrpl_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrpl.l.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vftintrpl_l_s(v4f32 _1) { return __builtin_lsx_vftintrpl_l_s(_1); } +// CHECK-LABEL: @vftintrph_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrph.l.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vftintrph_l_s(v4f32 _1) { return __builtin_lsx_vftintrph_l_s(_1); } +// CHECK-LABEL: @vftintrml_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrml.l.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vftintrml_l_s(v4f32 _1) { return __builtin_lsx_vftintrml_l_s(_1); } +// CHECK-LABEL: @vftintrmh_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrmh.l.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vftintrmh_l_s(v4f32 _1) { return __builtin_lsx_vftintrmh_l_s(_1); } +// CHECK-LABEL: @vftintrnel_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrnel.l.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vftintrnel_l_s(v4f32 _1) { + return __builtin_lsx_vftintrnel_l_s(_1); +} +// CHECK-LABEL: @vftintrneh_l_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vftintrneh.l.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vftintrneh_l_s(v4f32 _1) { + return __builtin_lsx_vftintrneh_l_s(_1); +} +// CHECK-LABEL: @vfrintrne_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrne.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vfrintrne_s(v4f32 _1) { return __builtin_lsx_vfrintrne_s(_1); } +// CHECK-LABEL: @vfrintrne_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrne.d(<2 x double> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vfrintrne_d(v2f64 _1) { return __builtin_lsx_vfrintrne_d(_1); } +// CHECK-LABEL: @vfrintrz_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrz.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vfrintrz_s(v4f32 _1) { return __builtin_lsx_vfrintrz_s(_1); } +// CHECK-LABEL: @vfrintrz_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrz.d(<2 x double> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vfrintrz_d(v2f64 _1) { return __builtin_lsx_vfrintrz_d(_1); } +// CHECK-LABEL: @vfrintrp_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrp.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vfrintrp_s(v4f32 _1) { return __builtin_lsx_vfrintrp_s(_1); } +// CHECK-LABEL: @vfrintrp_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrp.d(<2 x double> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vfrintrp_d(v2f64 _1) { return __builtin_lsx_vfrintrp_d(_1); } +// CHECK-LABEL: @vfrintrm_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lsx.vfrintrm.s(<4 x float> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vfrintrm_s(v4f32 _1) { return __builtin_lsx_vfrintrm_s(_1); } +// CHECK-LABEL: @vfrintrm_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lsx.vfrintrm.d(<2 x double> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vfrintrm_d(v2f64 _1) { return __builtin_lsx_vfrintrm_d(_1); } +// CHECK-LABEL: @vstelm_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.b(<16 x i8> [[TMP0]], ptr [[_2:%.*]], i32 1, i32 1) +// CHECK-NEXT: ret void +// +void vstelm_b(v16i8 _1, void *_2) { + return __builtin_lsx_vstelm_b(_1, _2, 1, 1); +} +// CHECK-LABEL: @vstelm_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.h(<8 x i16> [[TMP0]], ptr [[_2:%.*]], i32 2, i32 1) +// CHECK-NEXT: ret void +// +void vstelm_h(v8i16 _1, void *_2) { + return __builtin_lsx_vstelm_h(_1, _2, 2, 1); +} +// CHECK-LABEL: @vstelm_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.w(<4 x i32> [[TMP0]], ptr [[_2:%.*]], i32 4, i32 1) +// CHECK-NEXT: ret void +// +void vstelm_w(v4i32 _1, void *_2) { + return __builtin_lsx_vstelm_w(_1, _2, 4, 1); +} +// CHECK-LABEL: @vstelm_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.d(<2 x i64> [[TMP0]], ptr [[_2:%.*]], i32 8, i32 1) +// CHECK-NEXT: ret void +// +void vstelm_d(v2i64 _1, void *_2) { + return __builtin_lsx_vstelm_d(_1, _2, 8, 1); +} +// CHECK-LABEL: @vaddwev_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vaddwev_d_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vaddwev_d_w(_1, _2); +} +// CHECK-LABEL: @vaddwev_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vaddwev_w_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vaddwev_w_h(_1, _2); +} +// CHECK-LABEL: @vaddwev_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vaddwev_h_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vaddwev_h_b(_1, _2); +} +// CHECK-LABEL: @vaddwod_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vaddwod_d_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vaddwod_d_w(_1, _2); +} +// CHECK-LABEL: @vaddwod_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vaddwod_w_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vaddwod_w_h(_1, _2); +} +// CHECK-LABEL: @vaddwod_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vaddwod_h_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vaddwod_h_b(_1, _2); +} +// CHECK-LABEL: @vaddwev_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vaddwev_d_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vaddwev_d_wu(_1, _2); +} +// CHECK-LABEL: @vaddwev_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vaddwev_w_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vaddwev_w_hu(_1, _2); +} +// CHECK-LABEL: @vaddwev_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vaddwev_h_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vaddwev_h_bu(_1, _2); +} +// CHECK-LABEL: @vaddwod_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vaddwod_d_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vaddwod_d_wu(_1, _2); +} +// CHECK-LABEL: @vaddwod_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vaddwod_w_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vaddwod_w_hu(_1, _2); +} +// CHECK-LABEL: @vaddwod_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vaddwod_h_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vaddwod_h_bu(_1, _2); +} +// CHECK-LABEL: @vaddwev_d_wu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vaddwev_d_wu_w(v4u32 _1, v4i32 _2) { + return __builtin_lsx_vaddwev_d_wu_w(_1, _2); +} +// CHECK-LABEL: @vaddwev_w_hu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vaddwev_w_hu_h(v8u16 _1, v8i16 _2) { + return __builtin_lsx_vaddwev_w_hu_h(_1, _2); +} +// CHECK-LABEL: @vaddwev_h_bu_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vaddwev_h_bu_b(v16u8 _1, v16i8 _2) { + return __builtin_lsx_vaddwev_h_bu_b(_1, _2); +} +// CHECK-LABEL: @vaddwod_d_wu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vaddwod_d_wu_w(v4u32 _1, v4i32 _2) { + return __builtin_lsx_vaddwod_d_wu_w(_1, _2); +} +// CHECK-LABEL: @vaddwod_w_hu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vaddwod_w_hu_h(v8u16 _1, v8i16 _2) { + return __builtin_lsx_vaddwod_w_hu_h(_1, _2); +} +// CHECK-LABEL: @vaddwod_h_bu_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vaddwod_h_bu_b(v16u8 _1, v16i8 _2) { + return __builtin_lsx_vaddwod_h_bu_b(_1, _2); +} +// CHECK-LABEL: @vsubwev_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vsubwev_d_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vsubwev_d_w(_1, _2); +} +// CHECK-LABEL: @vsubwev_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vsubwev_w_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vsubwev_w_h(_1, _2); +} +// CHECK-LABEL: @vsubwev_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vsubwev_h_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vsubwev_h_b(_1, _2); +} +// CHECK-LABEL: @vsubwod_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vsubwod_d_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vsubwod_d_w(_1, _2); +} +// CHECK-LABEL: @vsubwod_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vsubwod_w_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vsubwod_w_h(_1, _2); +} +// CHECK-LABEL: @vsubwod_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vsubwod_h_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vsubwod_h_b(_1, _2); +} +// CHECK-LABEL: @vsubwev_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vsubwev_d_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vsubwev_d_wu(_1, _2); +} +// CHECK-LABEL: @vsubwev_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vsubwev_w_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vsubwev_w_hu(_1, _2); +} +// CHECK-LABEL: @vsubwev_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vsubwev_h_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vsubwev_h_bu(_1, _2); +} +// CHECK-LABEL: @vsubwod_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vsubwod_d_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vsubwod_d_wu(_1, _2); +} +// CHECK-LABEL: @vsubwod_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vsubwod_w_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vsubwod_w_hu(_1, _2); +} +// CHECK-LABEL: @vsubwod_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vsubwod_h_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vsubwod_h_bu(_1, _2); +} +// CHECK-LABEL: @vaddwev_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vaddwev_q_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vaddwev_q_d(_1, _2); +} +// CHECK-LABEL: @vaddwod_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vaddwod_q_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vaddwod_q_d(_1, _2); +} +// CHECK-LABEL: @vaddwev_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vaddwev_q_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vaddwev_q_du(_1, _2); +} +// CHECK-LABEL: @vaddwod_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vaddwod_q_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vaddwod_q_du(_1, _2); +} +// CHECK-LABEL: @vsubwev_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vsubwev_q_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vsubwev_q_d(_1, _2); +} +// CHECK-LABEL: @vsubwod_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vsubwod_q_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vsubwod_q_d(_1, _2); +} +// CHECK-LABEL: @vsubwev_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vsubwev_q_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vsubwev_q_du(_1, _2); +} +// CHECK-LABEL: @vsubwod_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vsubwod_q_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vsubwod_q_du(_1, _2); +} +// CHECK-LABEL: @vaddwev_q_du_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vaddwev_q_du_d(v2u64 _1, v2i64 _2) { + return __builtin_lsx_vaddwev_q_du_d(_1, _2); +} +// CHECK-LABEL: @vaddwod_q_du_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vaddwod_q_du_d(v2u64 _1, v2i64 _2) { + return __builtin_lsx_vaddwod_q_du_d(_1, _2); +} +// CHECK-LABEL: @vmulwev_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vmulwev_d_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vmulwev_d_w(_1, _2); +} +// CHECK-LABEL: @vmulwev_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vmulwev_w_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vmulwev_w_h(_1, _2); +} +// CHECK-LABEL: @vmulwev_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vmulwev_h_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vmulwev_h_b(_1, _2); +} +// CHECK-LABEL: @vmulwod_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vmulwod_d_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vmulwod_d_w(_1, _2); +} +// CHECK-LABEL: @vmulwod_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vmulwod_w_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vmulwod_w_h(_1, _2); +} +// CHECK-LABEL: @vmulwod_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vmulwod_h_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vmulwod_h_b(_1, _2); +} +// CHECK-LABEL: @vmulwev_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vmulwev_d_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vmulwev_d_wu(_1, _2); +} +// CHECK-LABEL: @vmulwev_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vmulwev_w_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vmulwev_w_hu(_1, _2); +} +// CHECK-LABEL: @vmulwev_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vmulwev_h_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vmulwev_h_bu(_1, _2); +} +// CHECK-LABEL: @vmulwod_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vmulwod_d_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vmulwod_d_wu(_1, _2); +} +// CHECK-LABEL: @vmulwod_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vmulwod_w_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vmulwod_w_hu(_1, _2); +} +// CHECK-LABEL: @vmulwod_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vmulwod_h_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vmulwod_h_bu(_1, _2); +} +// CHECK-LABEL: @vmulwev_d_wu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vmulwev_d_wu_w(v4u32 _1, v4i32 _2) { + return __builtin_lsx_vmulwev_d_wu_w(_1, _2); +} +// CHECK-LABEL: @vmulwev_w_hu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vmulwev_w_hu_h(v8u16 _1, v8i16 _2) { + return __builtin_lsx_vmulwev_w_hu_h(_1, _2); +} +// CHECK-LABEL: @vmulwev_h_bu_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vmulwev_h_bu_b(v16u8 _1, v16i8 _2) { + return __builtin_lsx_vmulwev_h_bu_b(_1, _2); +} +// CHECK-LABEL: @vmulwod_d_wu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vmulwod_d_wu_w(v4u32 _1, v4i32 _2) { + return __builtin_lsx_vmulwod_d_wu_w(_1, _2); +} +// CHECK-LABEL: @vmulwod_w_hu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vmulwod_w_hu_h(v8u16 _1, v8i16 _2) { + return __builtin_lsx_vmulwod_w_hu_h(_1, _2); +} +// CHECK-LABEL: @vmulwod_h_bu_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vmulwod_h_bu_b(v16u8 _1, v16i8 _2) { + return __builtin_lsx_vmulwod_h_bu_b(_1, _2); +} +// CHECK-LABEL: @vmulwev_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vmulwev_q_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vmulwev_q_d(_1, _2); +} +// CHECK-LABEL: @vmulwod_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vmulwod_q_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vmulwod_q_d(_1, _2); +} +// CHECK-LABEL: @vmulwev_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vmulwev_q_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vmulwev_q_du(_1, _2); +} +// CHECK-LABEL: @vmulwod_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vmulwod_q_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vmulwod_q_du(_1, _2); +} +// CHECK-LABEL: @vmulwev_q_du_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vmulwev_q_du_d(v2u64 _1, v2i64 _2) { + return __builtin_lsx_vmulwev_q_du_d(_1, _2); +} +// CHECK-LABEL: @vmulwod_q_du_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vmulwod_q_du_d(v2u64 _1, v2i64 _2) { + return __builtin_lsx_vmulwod_q_du_d(_1, _2); +} +// CHECK-LABEL: @vhaddw_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vhaddw_q_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vhaddw_q_d(_1, _2); +} +// CHECK-LABEL: @vhaddw_qu_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.qu.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2u64 vhaddw_qu_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vhaddw_qu_du(_1, _2); +} +// CHECK-LABEL: @vhsubw_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vhsubw_q_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vhsubw_q_d(_1, _2); +} +// CHECK-LABEL: @vhsubw_qu_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.qu.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2u64 vhsubw_qu_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vhsubw_qu_du(_1, _2); +} +// CHECK-LABEL: @vmaddwev_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.w(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v2i64 vmaddwev_d_w(v2i64 _1, v4i32 _2, v4i32 _3) { + return __builtin_lsx_vmaddwev_d_w(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwev_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.h(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v4i32 vmaddwev_w_h(v4i32 _1, v8i16 _2, v8i16 _3) { + return __builtin_lsx_vmaddwev_w_h(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwev_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.b(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v8i16 vmaddwev_h_b(v8i16 _1, v16i8 _2, v16i8 _3) { + return __builtin_lsx_vmaddwev_h_b(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwev_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v2u64 vmaddwev_d_wu(v2u64 _1, v4u32 _2, v4u32 _3) { + return __builtin_lsx_vmaddwev_d_wu(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwev_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v4u32 vmaddwev_w_hu(v4u32 _1, v8u16 _2, v8u16 _3) { + return __builtin_lsx_vmaddwev_w_hu(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwev_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v8u16 vmaddwev_h_bu(v8u16 _1, v16u8 _2, v16u8 _3) { + return __builtin_lsx_vmaddwev_h_bu(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwod_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.w(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v2i64 vmaddwod_d_w(v2i64 _1, v4i32 _2, v4i32 _3) { + return __builtin_lsx_vmaddwod_d_w(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwod_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.h(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v4i32 vmaddwod_w_h(v4i32 _1, v8i16 _2, v8i16 _3) { + return __builtin_lsx_vmaddwod_w_h(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwod_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.b(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v8i16 vmaddwod_h_b(v8i16 _1, v16i8 _2, v16i8 _3) { + return __builtin_lsx_vmaddwod_h_b(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwod_d_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v2u64 vmaddwod_d_wu(v2u64 _1, v4u32 _2, v4u32 _3) { + return __builtin_lsx_vmaddwod_d_wu(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwod_w_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v4u32 vmaddwod_w_hu(v4u32 _1, v8u16 _2, v8u16 _3) { + return __builtin_lsx_vmaddwod_w_hu(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwod_h_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v8u16 vmaddwod_h_bu(v8u16 _1, v16u8 _2, v16u8 _3) { + return __builtin_lsx_vmaddwod_h_bu(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwev_d_wu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu.w(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v2i64 vmaddwev_d_wu_w(v2i64 _1, v4u32 _2, v4i32 _3) { + return __builtin_lsx_vmaddwev_d_wu_w(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwev_w_hu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu.h(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v4i32 vmaddwev_w_hu_h(v4i32 _1, v8u16 _2, v8i16 _3) { + return __builtin_lsx_vmaddwev_w_hu_h(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwev_h_bu_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu.b(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v8i16 vmaddwev_h_bu_b(v8i16 _1, v16u8 _2, v16i8 _3) { + return __builtin_lsx_vmaddwev_h_bu_b(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwod_d_wu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu.w(<2 x i64> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v2i64 vmaddwod_d_wu_w(v2i64 _1, v4u32 _2, v4i32 _3) { + return __builtin_lsx_vmaddwod_d_wu_w(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwod_w_hu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu.h(<4 x i32> [[TMP0]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v4i32 vmaddwod_w_hu_h(v4i32 _1, v8u16 _2, v8i16 _3) { + return __builtin_lsx_vmaddwod_w_hu_h(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwod_h_bu_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu.b(<8 x i16> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v8i16 vmaddwod_h_bu_b(v8i16 _1, v16u8 _2, v16i8 _3) { + return __builtin_lsx_vmaddwod_h_bu_b(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwev_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v2i64 vmaddwev_q_d(v2i64 _1, v2i64 _2, v2i64 _3) { + return __builtin_lsx_vmaddwev_q_d(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwod_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v2i64 vmaddwod_q_d(v2i64 _1, v2i64 _2, v2i64 _3) { + return __builtin_lsx_vmaddwod_q_d(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwev_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v2u64 vmaddwev_q_du(v2u64 _1, v2u64 _2, v2u64 _3) { + return __builtin_lsx_vmaddwev_q_du(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwod_q_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v2u64 vmaddwod_q_du(v2u64 _1, v2u64 _2, v2u64 _3) { + return __builtin_lsx_vmaddwod_q_du(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwev_q_du_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v2i64 vmaddwev_q_du_d(v2i64 _1, v2u64 _2, v2i64 _3) { + return __builtin_lsx_vmaddwev_q_du_d(_1, _2, _3); +} +// CHECK-LABEL: @vmaddwod_q_du_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v2i64 vmaddwod_q_du_d(v2i64 _1, v2u64 _2, v2i64 _3) { + return __builtin_lsx_vmaddwod_q_du_d(_1, _2, _3); +} +// CHECK-LABEL: @vrotr_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vrotr.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vrotr_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vrotr_b(_1, _2); +} +// CHECK-LABEL: @vrotr_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vrotr.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vrotr_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vrotr_h(_1, _2); +} +// CHECK-LABEL: @vrotr_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vrotr.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vrotr_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vrotr_w(_1, _2); +} +// CHECK-LABEL: @vrotr_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vrotr.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vrotr_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vrotr_d(_1, _2); +} +// CHECK-LABEL: @vadd_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vadd.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vadd_q(v2i64 _1, v2i64 _2) { return __builtin_lsx_vadd_q(_1, _2); } +// CHECK-LABEL: @vsub_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsub.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vsub_q(v2i64 _1, v2i64 _2) { return __builtin_lsx_vsub_q(_1, _2); } +// CHECK-LABEL: @vldrepl_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vldrepl.b(ptr [[_1:%.*]], i32 1) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to i128 +// CHECK-NEXT: ret i128 [[TMP1]] +// +v16i8 vldrepl_b(void *_1) { return __builtin_lsx_vldrepl_b(_1, 1); } +// CHECK-LABEL: @vldrepl_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vldrepl.h(ptr [[_1:%.*]], i32 2) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to i128 +// CHECK-NEXT: ret i128 [[TMP1]] +// +v8i16 vldrepl_h(void *_1) { return __builtin_lsx_vldrepl_h(_1, 2); } +// CHECK-LABEL: @vldrepl_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vldrepl.w(ptr [[_1:%.*]], i32 4) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to i128 +// CHECK-NEXT: ret i128 [[TMP1]] +// +v4i32 vldrepl_w(void *_1) { return __builtin_lsx_vldrepl_w(_1, 4); } +// CHECK-LABEL: @vldrepl_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vldrepl.d(ptr [[_1:%.*]], i32 8) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to i128 +// CHECK-NEXT: ret i128 [[TMP1]] +// +v2i64 vldrepl_d(void *_1) { return __builtin_lsx_vldrepl_d(_1, 8); } +// CHECK-LABEL: @vmskgez_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmskgez.b(<16 x i8> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vmskgez_b(v16i8 _1) { return __builtin_lsx_vmskgez_b(_1); } +// CHECK-LABEL: @vmsknz_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vmsknz.b(<16 x i8> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vmsknz_b(v16i8 _1) { return __builtin_lsx_vmsknz_b(_1); } +// CHECK-LABEL: @vexth_h_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vexth.h.b(<16 x i8> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vexth_h_b(v16i8 _1) { return __builtin_lsx_vexth_h_b(_1); } +// CHECK-LABEL: @vexth_w_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vexth.w.h(<8 x i16> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vexth_w_h(v8i16 _1) { return __builtin_lsx_vexth_w_h(_1); } +// CHECK-LABEL: @vexth_d_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.d.w(<4 x i32> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vexth_d_w(v4i32 _1) { return __builtin_lsx_vexth_d_w(_1); } +// CHECK-LABEL: @vexth_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.q.d(<2 x i64> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vexth_q_d(v2i64 _1) { return __builtin_lsx_vexth_q_d(_1); } +// CHECK-LABEL: @vexth_hu_bu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vexth.hu.bu(<16 x i8> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8u16 vexth_hu_bu(v16u8 _1) { return __builtin_lsx_vexth_hu_bu(_1); } +// CHECK-LABEL: @vexth_wu_hu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vexth.wu.hu(<8 x i16> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4u32 vexth_wu_hu(v8u16 _1) { return __builtin_lsx_vexth_wu_hu(_1); } +// CHECK-LABEL: @vexth_du_wu( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.du.wu(<4 x i32> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2u64 vexth_du_wu(v4u32 _1) { return __builtin_lsx_vexth_du_wu(_1); } +// CHECK-LABEL: @vexth_qu_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vexth.qu.du(<2 x i64> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2u64 vexth_qu_du(v2u64 _1) { return __builtin_lsx_vexth_qu_du(_1); } +// CHECK-LABEL: @vrotri_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vrotri.b(<16 x i8> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v16i8 vrotri_b(v16i8 _1) { return __builtin_lsx_vrotri_b(_1, 1); } +// CHECK-LABEL: @vrotri_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vrotri.h(<8 x i16> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v8i16 vrotri_h(v8i16 _1) { return __builtin_lsx_vrotri_h(_1, 1); } +// CHECK-LABEL: @vrotri_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vrotri.w(<4 x i32> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v4i32 vrotri_w(v4i32 _1) { return __builtin_lsx_vrotri_w(_1, 1); } +// CHECK-LABEL: @vrotri_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vrotri.d(<2 x i64> [[TMP0]], i32 1) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vrotri_d(v2i64 _1) { return __builtin_lsx_vrotri_d(_1, 1); } +// CHECK-LABEL: @vextl_q_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vextl.q.d(<2 x i64> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2i64 vextl_q_d(v2i64 _1) { return __builtin_lsx_vextl_q_d(_1); } +// CHECK-LABEL: @vsrlni_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vsrlni_b_h(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vsrlni_b_h(_1, _2, 1); +} +// CHECK-LABEL: @vsrlni_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vsrlni_h_w(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vsrlni_h_w(_1, _2, 1); +} +// CHECK-LABEL: @vsrlni_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vsrlni_w_d(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vsrlni_w_d(_1, _2, 1); +} +// CHECK-LABEL: @vsrlni_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vsrlni_d_q(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vsrlni_d_q(_1, _2, 1); +} +// CHECK-LABEL: @vsrlrni_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrlrni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vsrlrni_b_h(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vsrlrni_b_h(_1, _2, 1); +} +// CHECK-LABEL: @vsrlrni_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrlrni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vsrlrni_h_w(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vsrlrni_h_w(_1, _2, 1); +} +// CHECK-LABEL: @vsrlrni_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrlrni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vsrlrni_w_d(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vsrlrni_w_d(_1, _2, 1); +} +// CHECK-LABEL: @vsrlrni_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrlrni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vsrlrni_d_q(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vsrlrni_d_q(_1, _2, 1); +} +// CHECK-LABEL: @vssrlni_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vssrlni_b_h(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vssrlni_b_h(_1, _2, 1); +} +// CHECK-LABEL: @vssrlni_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vssrlni_h_w(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vssrlni_h_w(_1, _2, 1); +} +// CHECK-LABEL: @vssrlni_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vssrlni_w_d(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vssrlni_w_d(_1, _2, 1); +} +// CHECK-LABEL: @vssrlni_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vssrlni_d_q(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vssrlni_d_q(_1, _2, 1); +} +// CHECK-LABEL: @vssrlni_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlni.bu.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vssrlni_bu_h(v16u8 _1, v16i8 _2) { + return __builtin_lsx_vssrlni_bu_h(_1, _2, 1); +} +// CHECK-LABEL: @vssrlni_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlni.hu.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8u16 vssrlni_hu_w(v8u16 _1, v8i16 _2) { + return __builtin_lsx_vssrlni_hu_w(_1, _2, 1); +} +// CHECK-LABEL: @vssrlni_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlni.wu.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4u32 vssrlni_wu_d(v4u32 _1, v4i32 _2) { + return __builtin_lsx_vssrlni_wu_d(_1, _2, 1); +} +// CHECK-LABEL: @vssrlni_du_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlni.du.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2u64 vssrlni_du_q(v2u64 _1, v2i64 _2) { + return __builtin_lsx_vssrlni_du_q(_1, _2, 1); +} +// CHECK-LABEL: @vssrlrni_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vssrlrni_b_h(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vssrlrni_b_h(_1, _2, 1); +} +// CHECK-LABEL: @vssrlrni_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vssrlrni_h_w(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vssrlrni_h_w(_1, _2, 1); +} +// CHECK-LABEL: @vssrlrni_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vssrlrni_w_d(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vssrlrni_w_d(_1, _2, 1); +} +// CHECK-LABEL: @vssrlrni_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlrni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vssrlrni_d_q(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vssrlrni_d_q(_1, _2, 1); +} +// CHECK-LABEL: @vssrlrni_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrni.bu.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vssrlrni_bu_h(v16u8 _1, v16i8 _2) { + return __builtin_lsx_vssrlrni_bu_h(_1, _2, 1); +} +// CHECK-LABEL: @vssrlrni_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrni.hu.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8u16 vssrlrni_hu_w(v8u16 _1, v8i16 _2) { + return __builtin_lsx_vssrlrni_hu_w(_1, _2, 1); +} +// CHECK-LABEL: @vssrlrni_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrni.wu.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4u32 vssrlrni_wu_d(v4u32 _1, v4i32 _2) { + return __builtin_lsx_vssrlrni_wu_d(_1, _2, 1); +} +// CHECK-LABEL: @vssrlrni_du_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrlrni.du.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2u64 vssrlrni_du_q(v2u64 _1, v2i64 _2) { + return __builtin_lsx_vssrlrni_du_q(_1, _2, 1); +} +// CHECK-LABEL: @vsrani_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrani.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vsrani_b_h(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vsrani_b_h(_1, _2, 1); +} +// CHECK-LABEL: @vsrani_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrani.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vsrani_h_w(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vsrani_h_w(_1, _2, 1); +} +// CHECK-LABEL: @vsrani_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrani.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vsrani_w_d(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vsrani_w_d(_1, _2, 1); +} +// CHECK-LABEL: @vsrani_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrani.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vsrani_d_q(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vsrani_d_q(_1, _2, 1); +} +// CHECK-LABEL: @vsrarni_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vsrarni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vsrarni_b_h(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vsrarni_b_h(_1, _2, 1); +} +// CHECK-LABEL: @vsrarni_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vsrarni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vsrarni_h_w(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vsrarni_h_w(_1, _2, 1); +} +// CHECK-LABEL: @vsrarni_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vsrarni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vsrarni_w_d(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vsrarni_w_d(_1, _2, 1); +} +// CHECK-LABEL: @vsrarni_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vsrarni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vsrarni_d_q(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vsrarni_d_q(_1, _2, 1); +} +// CHECK-LABEL: @vssrani_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrani.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vssrani_b_h(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vssrani_b_h(_1, _2, 1); +} +// CHECK-LABEL: @vssrani_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrani.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vssrani_h_w(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vssrani_h_w(_1, _2, 1); +} +// CHECK-LABEL: @vssrani_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrani.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vssrani_w_d(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vssrani_w_d(_1, _2, 1); +} +// CHECK-LABEL: @vssrani_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrani.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vssrani_d_q(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vssrani_d_q(_1, _2, 1); +} +// CHECK-LABEL: @vssrani_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrani.bu.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vssrani_bu_h(v16u8 _1, v16i8 _2) { + return __builtin_lsx_vssrani_bu_h(_1, _2, 1); +} +// CHECK-LABEL: @vssrani_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrani.hu.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8u16 vssrani_hu_w(v8u16 _1, v8i16 _2) { + return __builtin_lsx_vssrani_hu_w(_1, _2, 1); +} +// CHECK-LABEL: @vssrani_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrani.wu.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4u32 vssrani_wu_d(v4u32 _1, v4i32 _2) { + return __builtin_lsx_vssrani_wu_d(_1, _2, 1); +} +// CHECK-LABEL: @vssrani_du_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrani.du.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2u64 vssrani_du_q(v2u64 _1, v2i64 _2) { + return __builtin_lsx_vssrani_du_q(_1, _2, 1); +} +// CHECK-LABEL: @vssrarni_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarni.b.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vssrarni_b_h(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vssrarni_b_h(_1, _2, 1); +} +// CHECK-LABEL: @vssrarni_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarni.h.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vssrarni_h_w(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vssrarni_h_w(_1, _2, 1); +} +// CHECK-LABEL: @vssrarni_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarni.w.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vssrarni_w_d(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vssrarni_w_d(_1, _2, 1); +} +// CHECK-LABEL: @vssrarni_d_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrarni.d.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vssrarni_d_q(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vssrarni_d_q(_1, _2, 1); +} +// CHECK-LABEL: @vssrarni_bu_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrarni.bu.h(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16u8 vssrarni_bu_h(v16u8 _1, v16i8 _2) { + return __builtin_lsx_vssrarni_bu_h(_1, _2, 1); +} +// CHECK-LABEL: @vssrarni_hu_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrarni.hu.w(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8u16 vssrarni_hu_w(v8u16 _1, v8i16 _2) { + return __builtin_lsx_vssrarni_hu_w(_1, _2, 1); +} +// CHECK-LABEL: @vssrarni_wu_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrarni.wu.d(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4u32 vssrarni_wu_d(v4u32 _1, v4i32 _2) { + return __builtin_lsx_vssrarni_wu_d(_1, _2, 1); +} +// CHECK-LABEL: @vssrarni_du_q( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vssrarni.du.q(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2u64 vssrarni_du_q(v2u64 _1, v2i64 _2) { + return __builtin_lsx_vssrarni_du_q(_1, _2, 1); +} +// CHECK-LABEL: @vpermi_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vpermi.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]], i32 1) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vpermi_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vpermi_w(_1, _2, 1); +} +// CHECK-LABEL: @vld( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vld(ptr [[_1:%.*]], i32 1) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to i128 +// CHECK-NEXT: ret i128 [[TMP1]] +// +v16i8 vld(void *_1) { return __builtin_lsx_vld(_1, 1); } +// CHECK-LABEL: @vst( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vst(<16 x i8> [[TMP0]], ptr [[_2:%.*]], i32 1) +// CHECK-NEXT: ret void +// +void vst(v16i8 _1, void *_2) { return __builtin_lsx_vst(_1, _2, 1); } +// CHECK-LABEL: @vssrlrn_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrn.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vssrlrn_b_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vssrlrn_b_h(_1, _2); +} +// CHECK-LABEL: @vssrlrn_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrn.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vssrlrn_h_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vssrlrn_h_w(_1, _2); +} +// CHECK-LABEL: @vssrlrn_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrn.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vssrlrn_w_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vssrlrn_w_d(_1, _2); +} +// CHECK-LABEL: @vssrln_b_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vssrln.b.h(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vssrln_b_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vssrln_b_h(_1, _2); +} +// CHECK-LABEL: @vssrln_h_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vssrln.h.w(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v8i16 vssrln_h_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vssrln_h_w(_1, _2); +} +// CHECK-LABEL: @vssrln_w_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vssrln.w.d(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vssrln_w_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vssrln_w_d(_1, _2); +} +// CHECK-LABEL: @vorn_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vorn.v(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v16i8 vorn_v(v16i8 _1, v16i8 _2) { return __builtin_lsx_vorn_v(_1, _2); } +// CHECK-LABEL: @vldi( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vldi(i32 1) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to i128 +// CHECK-NEXT: ret i128 [[TMP1]] +// +v2i64 vldi() { return __builtin_lsx_vldi(1); } +// CHECK-LABEL: @vshuf_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i128 [[_3_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP3:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vshuf.b(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to i128 +// CHECK-NEXT: ret i128 [[TMP4]] +// +v16i8 vshuf_b(v16i8 _1, v16i8 _2, v16i8 _3) { + return __builtin_lsx_vshuf_b(_1, _2, _3); +} +// CHECK-LABEL: @vldx( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vldx(ptr [[_1:%.*]], i64 1) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to i128 +// CHECK-NEXT: ret i128 [[TMP1]] +// +v16i8 vldx(void *_1) { return __builtin_lsx_vldx(_1, 1); } +// CHECK-LABEL: @vstx( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstx(<16 x i8> [[TMP0]], ptr [[_2:%.*]], i64 1) +// CHECK-NEXT: ret void +// +void vstx(v16i8 _1, void *_2) { return __builtin_lsx_vstx(_1, _2, 1); } +// CHECK-LABEL: @vextl_qu_du( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vextl.qu.du(<2 x i64> [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128 +// CHECK-NEXT: ret i128 [[TMP2]] +// +v2u64 vextl_qu_du(v2u64 _1) { return __builtin_lsx_vextl_qu_du(_1); } +// CHECK-LABEL: @bnz_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.b(<16 x i8> [[TMP0]]) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int bnz_b(v16u8 _1) { return __builtin_lsx_bnz_b(_1); } +// CHECK-LABEL: @bnz_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.d(<2 x i64> [[TMP0]]) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int bnz_d(v2u64 _1) { return __builtin_lsx_bnz_d(_1); } +// CHECK-LABEL: @bnz_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.h(<8 x i16> [[TMP0]]) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int bnz_h(v8u16 _1) { return __builtin_lsx_bnz_h(_1); } +// CHECK-LABEL: @bnz_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.v(<16 x i8> [[TMP0]]) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int bnz_v(v16u8 _1) { return __builtin_lsx_bnz_v(_1); } +// CHECK-LABEL: @bnz_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bnz.w(<4 x i32> [[TMP0]]) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int bnz_w(v4u32 _1) { return __builtin_lsx_bnz_w(_1); } +// CHECK-LABEL: @bz_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.b(<16 x i8> [[TMP0]]) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int bz_b(v16u8 _1) { return __builtin_lsx_bz_b(_1); } +// CHECK-LABEL: @bz_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64> +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.d(<2 x i64> [[TMP0]]) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int bz_d(v2u64 _1) { return __builtin_lsx_bz_d(_1); } +// CHECK-LABEL: @bz_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.h(<8 x i16> [[TMP0]]) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int bz_h(v8u16 _1) { return __builtin_lsx_bz_h(_1); } +// CHECK-LABEL: @bz_v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.v(<16 x i8> [[TMP0]]) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int bz_v(v16u8 _1) { return __builtin_lsx_bz_v(_1); } +// CHECK-LABEL: @bz_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x i32> +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.loongarch.lsx.bz.w(<4 x i32> [[TMP0]]) +// CHECK-NEXT: ret i32 [[TMP1]] +// +int bz_w(v4u32 _1) { return __builtin_lsx_bz_w(_1); } +// CHECK-LABEL: @vfcmp_caf_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.caf.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vfcmp_caf_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_caf_d(_1, _2); +} +// CHECK-LABEL: @vfcmp_caf_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.caf.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vfcmp_caf_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_caf_s(_1, _2); +} +// CHECK-LABEL: @vfcmp_ceq_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.ceq.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vfcmp_ceq_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_ceq_d(_1, _2); +} +// CHECK-LABEL: @vfcmp_ceq_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.ceq.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vfcmp_ceq_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_ceq_s(_1, _2); +} +// CHECK-LABEL: @vfcmp_cle_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cle.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vfcmp_cle_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_cle_d(_1, _2); +} +// CHECK-LABEL: @vfcmp_cle_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cle.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vfcmp_cle_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_cle_s(_1, _2); +} +// CHECK-LABEL: @vfcmp_clt_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.clt.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vfcmp_clt_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_clt_d(_1, _2); +} +// CHECK-LABEL: @vfcmp_clt_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.clt.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vfcmp_clt_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_clt_s(_1, _2); +} +// CHECK-LABEL: @vfcmp_cne_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cne.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vfcmp_cne_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_cne_d(_1, _2); +} +// CHECK-LABEL: @vfcmp_cne_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cne.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vfcmp_cne_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_cne_s(_1, _2); +} +// CHECK-LABEL: @vfcmp_cor_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cor.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vfcmp_cor_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_cor_d(_1, _2); +} +// CHECK-LABEL: @vfcmp_cor_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cor.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vfcmp_cor_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_cor_s(_1, _2); +} +// CHECK-LABEL: @vfcmp_cueq_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cueq.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vfcmp_cueq_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_cueq_d(_1, _2); +} +// CHECK-LABEL: @vfcmp_cueq_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cueq.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vfcmp_cueq_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_cueq_s(_1, _2); +} +// CHECK-LABEL: @vfcmp_cule_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cule.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vfcmp_cule_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_cule_d(_1, _2); +} +// CHECK-LABEL: @vfcmp_cule_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cule.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vfcmp_cule_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_cule_s(_1, _2); +} +// CHECK-LABEL: @vfcmp_cult_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cult.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vfcmp_cult_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_cult_d(_1, _2); +} +// CHECK-LABEL: @vfcmp_cult_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cult.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vfcmp_cult_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_cult_s(_1, _2); +} +// CHECK-LABEL: @vfcmp_cun_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cun.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vfcmp_cun_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_cun_d(_1, _2); +} +// CHECK-LABEL: @vfcmp_cune_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cune.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vfcmp_cune_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_cune_d(_1, _2); +} +// CHECK-LABEL: @vfcmp_cune_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cune.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vfcmp_cune_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_cune_s(_1, _2); +} +// CHECK-LABEL: @vfcmp_cun_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cun.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vfcmp_cun_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_cun_s(_1, _2); +} +// CHECK-LABEL: @vfcmp_saf_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.saf.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vfcmp_saf_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_saf_d(_1, _2); +} +// CHECK-LABEL: @vfcmp_saf_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.saf.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vfcmp_saf_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_saf_s(_1, _2); +} +// CHECK-LABEL: @vfcmp_seq_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.seq.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vfcmp_seq_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_seq_d(_1, _2); +} +// CHECK-LABEL: @vfcmp_seq_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.seq.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vfcmp_seq_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_seq_s(_1, _2); +} +// CHECK-LABEL: @vfcmp_sle_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sle.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vfcmp_sle_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_sle_d(_1, _2); +} +// CHECK-LABEL: @vfcmp_sle_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sle.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vfcmp_sle_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_sle_s(_1, _2); +} +// CHECK-LABEL: @vfcmp_slt_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.slt.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vfcmp_slt_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_slt_d(_1, _2); +} +// CHECK-LABEL: @vfcmp_slt_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.slt.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vfcmp_slt_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_slt_s(_1, _2); +} +// CHECK-LABEL: @vfcmp_sne_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sne.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vfcmp_sne_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_sne_d(_1, _2); +} +// CHECK-LABEL: @vfcmp_sne_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sne.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vfcmp_sne_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_sne_s(_1, _2); +} +// CHECK-LABEL: @vfcmp_sor_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sor.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vfcmp_sor_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_sor_d(_1, _2); +} +// CHECK-LABEL: @vfcmp_sor_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sor.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vfcmp_sor_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_sor_s(_1, _2); +} +// CHECK-LABEL: @vfcmp_sueq_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sueq.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vfcmp_sueq_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_sueq_d(_1, _2); +} +// CHECK-LABEL: @vfcmp_sueq_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sueq.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vfcmp_sueq_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_sueq_s(_1, _2); +} +// CHECK-LABEL: @vfcmp_sule_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sule.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vfcmp_sule_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_sule_d(_1, _2); +} +// CHECK-LABEL: @vfcmp_sule_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sule.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vfcmp_sule_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_sule_s(_1, _2); +} +// CHECK-LABEL: @vfcmp_sult_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sult.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vfcmp_sult_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_sult_d(_1, _2); +} +// CHECK-LABEL: @vfcmp_sult_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sult.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vfcmp_sult_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_sult_s(_1, _2); +} +// CHECK-LABEL: @vfcmp_sun_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sun.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vfcmp_sun_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_sun_d(_1, _2); +} +// CHECK-LABEL: @vfcmp_sune_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sune.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v2i64 vfcmp_sune_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_sune_d(_1, _2); +} +// CHECK-LABEL: @vfcmp_sune_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sune.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vfcmp_sune_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_sune_s(_1, _2); +} +// CHECK-LABEL: @vfcmp_sun_s( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float> +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sun.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128 +// CHECK-NEXT: ret i128 [[TMP3]] +// +v4i32 vfcmp_sun_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_sun_s(_1, _2); +} +// CHECK-LABEL: @vrepli_b( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <16 x i8> @llvm.loongarch.lsx.vrepli.b(i32 1) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to i128 +// CHECK-NEXT: ret i128 [[TMP1]] +// +v16i8 vrepli_b() { return __builtin_lsx_vrepli_b(1); } +// CHECK-LABEL: @vrepli_d( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.loongarch.lsx.vrepli.d(i32 1) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to i128 +// CHECK-NEXT: ret i128 [[TMP1]] +// +v2i64 vrepli_d() { return __builtin_lsx_vrepli_d(1); } +// CHECK-LABEL: @vrepli_h( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.loongarch.lsx.vrepli.h(i32 1) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to i128 +// CHECK-NEXT: ret i128 [[TMP1]] +// +v8i16 vrepli_h() { return __builtin_lsx_vrepli_h(1); } +// CHECK-LABEL: @vrepli_w( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.loongarch.lsx.vrepli.w(i32 1) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to i128 +// CHECK-NEXT: ret i128 [[TMP1]] +// +v4i32 vrepli_w() { return __builtin_lsx_vrepli_w(1); } diff --git a/clang/test/CodeGen/LoongArch/lsx/inline-asm-gcc-regs-error.c b/clang/test/CodeGen/LoongArch/lsx/inline-asm-gcc-regs-error.c new file mode 100644 index 0000000000000000000000000000000000000000..54132307e93f48563226e18503c3e33054c2b88c --- /dev/null +++ b/clang/test/CodeGen/LoongArch/lsx/inline-asm-gcc-regs-error.c @@ -0,0 +1,10 @@ +// RUN: not %clang_cc1 -triple loongarch64 -emit-llvm -O2 %s 2>&1 -o - | FileCheck %s + +typedef signed char v16i8 __attribute__((vector_size(16), aligned(16))); + +void test() { +// CHECK: :[[#@LINE+1]]:28: error: unknown register name 'vr0' in asm + register v16i8 p0 asm ("vr0"); +// CHECK: :[[#@LINE+1]]:29: error: unknown register name '$vr32' in asm + register v16i8 p32 asm ("$vr32"); +} diff --git a/clang/test/CodeGen/LoongArch/lsx/inline-asm-gcc-regs.c b/clang/test/CodeGen/LoongArch/lsx/inline-asm-gcc-regs.c new file mode 100644 index 0000000000000000000000000000000000000000..b05b1c8c15fae2bda0c1d78a2a86125f69a2319a --- /dev/null +++ b/clang/test/CodeGen/LoongArch/lsx/inline-asm-gcc-regs.c @@ -0,0 +1,36 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --filter "^define |tail call" +// RUN: %clang_cc1 -triple loongarch64 -emit-llvm -O2 %s -o - | FileCheck %s + +typedef signed char v16i8 __attribute__((vector_size(16), aligned(16))); + +// CHECK-LABEL: @test_vr0( +// CHECK: tail call void asm sideeffect "", "{$vr0}"(<16 x i8> undef) #[[ATTR1:[0-9]+]], !srcloc !2 +// +void test_vr0() { + register v16i8 a asm ("$vr0"); + asm ("" :: "f"(a)); +} + +// CHECK-LABEL: @test_vr7( +// CHECK: tail call void asm sideeffect "", "{$vr7}"(<16 x i8> undef) #[[ATTR1]], !srcloc !3 +// +void test_vr7() { + register v16i8 a asm ("$vr7"); + asm ("" :: "f"(a)); +} + +// CHECK-LABEL: @test_vr15( +// CHECK: tail call void asm sideeffect "", "{$vr15}"(<16 x i8> undef) #[[ATTR1]], !srcloc !4 +// +void test_vr15() { + register v16i8 a asm ("$vr15"); + asm ("" :: "f"(a)); +} + +// CHECK-LABEL: @test_vr31( +// CHECK: tail call void asm sideeffect "", "{$vr31}"(<16 x i8> undef) #[[ATTR1]], !srcloc !5 +// +void test_vr31() { + register v16i8 a asm ("$vr31"); + asm ("" :: "f"(a)); +} diff --git a/clang/test/CodeGen/LoongArch/lsx/inline-asm-operand-modifier.c b/clang/test/CodeGen/LoongArch/lsx/inline-asm-operand-modifier.c new file mode 100644 index 0000000000000000000000000000000000000000..5e0fae984134ef7a6976d5200e85b56f122a32b2 --- /dev/null +++ b/clang/test/CodeGen/LoongArch/lsx/inline-asm-operand-modifier.c @@ -0,0 +1,15 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2 +// RUN: %clang_cc1 -triple loongarch64 -emit-llvm -O2 %s -o - | FileCheck %s + +typedef long long v2i64 __attribute__ ((vector_size(16), aligned(16))); + +// CHECK-LABEL: define dso_local void @test_w +// CHECK-SAME: () local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> asm sideeffect "vldi ${0:w}, 1", "=f"() #[[ATTR1:[0-9]+]], !srcloc !2 +// CHECK-NEXT: ret void +// +void test_w() { + v2i64 v2i64_r; + asm volatile ("vldi %w0, 1" : "=f" (v2i64_r)); +} diff --git a/clang/test/CodeGen/mcount.c b/clang/test/CodeGen/mcount.c index 8f994ab4e75443a64d2c2336f9fbcaf4750916b1..bdd609c1dfc58262b2e38ccaf6b032cf199636fd 100644 --- a/clang/test/CodeGen/mcount.c +++ b/clang/test/CodeGen/mcount.c @@ -7,6 +7,8 @@ // RUN: %clang_cc1 -pg -triple x86_64-netbsd -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK-DOUBLE-PREFIXED,NO-MCOUNT1 %s // RUN: %clang_cc1 -pg -triple arm-netbsd-eabi -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK-DOUBLE-PREFIXED,NO-MCOUNT1 %s // RUN: %clang_cc1 -pg -triple aarch64-netbsd -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK-DOUBLE-PREFIXED,NO-MCOUNT1 %s +// RUN: %clang_cc1 -pg -triple loongarch32 -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK-PREFIXED,NO-MCOUNT1 %s +// RUN: %clang_cc1 -pg -triple loongarch64 -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK-PREFIXED,NO-MCOUNT1 %s // RUN: %clang_cc1 -pg -triple mips-netbsd -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK-DOUBLE-PREFIXED,NO-MCOUNT1 %s // RUN: %clang_cc1 -pg -triple mips-unknown-gnu-linux -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK-PREFIXED,NO-MCOUNT1 %s // RUN: %clang_cc1 -pg -triple mipsel-unknown-gnu-linux -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK-PREFIXED,NO-MCOUNT1 %s diff --git a/clang/test/Driver/fdirect-access-external-data.c b/clang/test/Driver/fdirect-access-external-data.c index f132b1b088af35dc4e11657ed2f4c3fff41989b9..a6da776e69777421d4b221f092cc7f26fecc7a39 100644 --- a/clang/test/Driver/fdirect-access-external-data.c +++ b/clang/test/Driver/fdirect-access-external-data.c @@ -9,6 +9,12 @@ // RUN: %clang -### -c -target aarch64 %s -fpic 2>&1 | FileCheck %s --check-prefix=DEFAULT // RUN: %clang -### -c -target aarch64 %s -fpic -fdirect-access-external-data 2>&1 | FileCheck %s --check-prefix=DIRECT +/// loongarch* targets default to -fno-direct-access-external-data even for -fno-pic. +// RUN: %clang -### -c --target=loongarch64 -fno-pic %s 2>&1 | FileCheck %s --check-prefix=INDIRECT +// RUN: %clang -### -c --target=loongarch64 -fpie %s 2>&1 | FileCheck %s --check-prefix=DEFAULT +// RUN: %clang -### -c --target=loongarch32 -fno-pic -fdirect-access-external-data %s 2>&1 | FileCheck %s --check-prefix=DEFAULT +// RUN: %clang -### -c --target=loongarch32 -fpie -fdirect-access-external-data %s 2>&1 | FileCheck %s --check-prefix=DIRECT + // DEFAULT-NOT: direct-access-external-data" // DIRECT: "-fdirect-access-external-data" // INDIRECT: "-fno-direct-access-external-data" diff --git a/clang/test/Driver/loongarch-mlasx-error.c b/clang/test/Driver/loongarch-mlasx-error.c new file mode 100644 index 0000000000000000000000000000000000000000..e66f277f7c292f1e64c9e85a0701cfe9d2056974 --- /dev/null +++ b/clang/test/Driver/loongarch-mlasx-error.c @@ -0,0 +1,15 @@ +// RUN: not %clang --target=loongarch64 %s -fsyntax-only -mlasx -msingle-float 2>&1 \ +// RUN: FileCheck --check-prefix=ERROR_LASX_FPU64 %s +// RUN: not %clang --target=loongarch64 %s -fsyntax-only -mlasx -msoft-float 2>&1 \ +// RUN: FileCheck --check-prefix=ERROR_LASX_FPU64 %s +// RUN: not %clang --target=loongarch64 %s -fsyntax-only -mlasx -mfpu=32 2>&1 \ +// RUN: FileCheck --check-prefix=ERROR_LASX_FPU64 %s +// RUN: not %clang --target=loongarch64 %s -fsyntax-only -mlasx -mfpu=0 2>&1 \ +// RUN: FileCheck --check-prefix=ERROR_LASX_FPU64 %s +// RUN: not %clang --target=loongarch64 %s -fsyntax-only -mlasx -mfpu=none 2>&1 \ +// RUN: FileCheck --check-prefix=ERROR_LASX_FPU64 %s +// RUN: not %clang --target=loongarch64 %s -fsyntax-only -mlasx -mno-lsx 2>&1 \ +// RUN: FileCheck --check-prefix=ERROR_LASX_FPU128 %s + +// ERROR_LASX_FPU64: error: wrong fpu width; LASX depends on 64-bit FPU. +// ERROR_LASX_FPU128: error: invalid option combination; LASX depends on LSX. diff --git a/clang/test/Driver/loongarch-mlasx.c b/clang/test/Driver/loongarch-mlasx.c new file mode 100644 index 0000000000000000000000000000000000000000..0b934f125c9e462b5d7176367deb7ee90acd8b87 --- /dev/null +++ b/clang/test/Driver/loongarch-mlasx.c @@ -0,0 +1,37 @@ +/// Test -m[no-]lasx options. + +// RUN: %clang --target=loongarch64 -mlasx -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1-LASX +// RUN: %clang --target=loongarch64 -mno-lasx -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1-NOLASX +// RUN: %clang --target=loongarch64 -mlasx -mno-lasx -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1-NOLASX +// RUN: %clang --target=loongarch64 -mno-lasx -mlasx -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1-LASX +// RUN: %clang --target=loongarch64 -mlsx -mlasx -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1-LASX +// RUN: %clang --target=loongarch64 -mlasx -mlsx -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1-LASX + +// RUN: %clang --target=loongarch64 -mlasx -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IR-LASX +// RUN: %clang --target=loongarch64 -mno-lasx -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IR-NOLASX +// RUN: %clang --target=loongarch64 -mlasx -mno-lasx -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IR-NOLASX +// RUN: %clang --target=loongarch64 -mno-lasx -mlasx -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IR-LASX +// RUN: %clang --target=loongarch64 -mlsx -mlasx -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IR-LASX +// RUN: %clang --target=loongarch64 -mlasx -mlsx -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IR-LASX + +// CC1-LASX: "-target-feature" "+lsx" "-target-feature" "+lasx" +// CC1-NOLASX: "-target-feature" "-lasx" + +// IR-LASX: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}+lasx{{(,.*)?}}" +// IR-NOLASX: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}-lasx{{(,.*)?}}" + +int foo(void){ + return 3; +} diff --git a/clang/test/Driver/loongarch-mlsx-error.c b/clang/test/Driver/loongarch-mlsx-error.c new file mode 100644 index 0000000000000000000000000000000000000000..bd6b8e2718bf6086d756f673aa5b347dac3ffeba --- /dev/null +++ b/clang/test/Driver/loongarch-mlsx-error.c @@ -0,0 +1,12 @@ +// RUN: not %clang --target=loongarch64 %s -fsyntax-only -mlsx -msingle-float 2>&1 \ +// RUN: FileCheck --check-prefix=ERROR_LSX_FPU64 %s +// RUN: not %clang --target=loongarch64 %s -fsyntax-only -mlsx -msoft-float 2>&1 \ +// RUN: FileCheck --check-prefix=ERROR_LSX_FPU64 %s +// RUN: not %clang --target=loongarch64 %s -fsyntax-only -mlsx -mfpu=32 2>&1 \ +// RUN: FileCheck --check-prefix=ERROR_LSX_FPU64 %s +// RUN: not %clang --target=loongarch64 %s -fsyntax-only -mlsx -mfpu=0 2>&1 \ +// RUN: FileCheck --check-prefix=ERROR_LSX_FPU64 %s +// RUN: not %clang --target=loongarch64 %s -fsyntax-only -mlsx -mfpu=none 2>&1 \ +// RUN: FileCheck --check-prefix=ERROR_LSX_FPU64 %s + +// ERROR_LSX_FPU64: error: wrong fpu width; LSX depends on 64-bit FPU. diff --git a/clang/test/Driver/loongarch-mlsx.c b/clang/test/Driver/loongarch-mlsx.c new file mode 100644 index 0000000000000000000000000000000000000000..7d4307b078e1a06fb6a453ff685f6d598dbce970 --- /dev/null +++ b/clang/test/Driver/loongarch-mlsx.c @@ -0,0 +1,41 @@ +/// Test -m[no-]lsx options. + +// RUN: %clang --target=loongarch64 -mlsx -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1-LSX +// RUN: %clang --target=loongarch64 -mno-lsx -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1-NOLSX +// RUN: %clang --target=loongarch64 -mlsx -mno-lsx -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1-NOLSX +// RUN: %clang --target=loongarch64 -mno-lsx -mlsx -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1-LSX +// RUN: %clang --target=loongarch64 -mlsx -mno-lasx -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1-LSX +// RUN: %clang --target=loongarch64 -mno-lasx -mlsx -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1-LSX +// RUN: %clang --target=loongarch64 -mno-lsx -mno-lasx -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1-NOLSX + +// RUN: %clang --target=loongarch64 -mlsx -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IR-LSX +// RUN: %clang --target=loongarch64 -mno-lsx -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IR-NOLSX +// RUN: %clang --target=loongarch64 -mlsx -mno-lsx -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IR-NOLSX +// RUN: %clang --target=loongarch64 -mno-lsx -mlsx -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IR-LSX +// RUN: %clang --target=loongarch64 -mlsx -mno-lasx -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IR-LSX +// RUN: %clang --target=loongarch64 -mno-lasx -mlsx -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IR-LSX +// RUN: %clang --target=loongarch64 -mno-lsx -mno-lasx -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IR-NOLSX + +// CC1-LSX: "-target-feature" "+lsx" +// CC1-NOLSX: "-target-feature" "-lsx" + +// IR-LSX: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}+lsx{{(,.*)?}}" +// IR-NOLSX: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}-lsx{{(,.*)?}}" + +int foo(void){ + return 3; +} diff --git a/clang/test/Driver/loongarch-msimd.c b/clang/test/Driver/loongarch-msimd.c new file mode 100644 index 0000000000000000000000000000000000000000..cd463300c87470d939298a286c37ef84368fbcf0 --- /dev/null +++ b/clang/test/Driver/loongarch-msimd.c @@ -0,0 +1,95 @@ +/// Test -msimd options. + +/// COM: -msimd=none +// RUN: %clang --target=loongarch64 -mlasx -msimd=none -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=LSX,LASX +// RUN: %clang --target=loongarch64 -mlasx -mlsx -msimd=none -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=LSX,LASX + +// RUN: %clang --target=loongarch64 -msimd=none -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX +// RUN: %clang --target=loongarch64 -mlasx -mno-lasx -msimd=none -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX +// RUN: %clang --target=loongarch64 -mlasx -mno-lasx -mlsx -mno-lsx -msimd=none -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX +// RUN: %clang --target=loongarch64 -mlasx -mno-lasx -mno-lsx -msimd=none -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX +// RUN: %clang --target=loongarch64 -mlsx -mno-lsx -msimd=none -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX +// RUN: %clang --target=loongarch64 -mno-lasx -msimd=none -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX +// RUN: %clang --target=loongarch64 -mno-lasx -mlsx -mno-lsx -msimd=none -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX +// RUN: %clang --target=loongarch64 -mno-lasx -mno-lsx -msimd=none -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX +// RUN: %clang --target=loongarch64 -mno-lsx -msimd=none -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX + +// RUN: %clang --target=loongarch64 -mlasx -mno-lasx -mlsx -msimd=none -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=LSX,NOLASX +// RUN: %clang --target=loongarch64 -mno-lasx -mlsx -msimd=none -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=LSX,NOLASX +// RUN: %clang --target=loongarch64 -mlsx -msimd=none -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=LSX,NOLASX + + +/// COM: -msimd=lsx +// RUN: %clang --target=loongarch64 -mlasx -msimd=lsx -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=LSX,LASX +// RUN: %clang --target=loongarch64 -mlasx -mlsx -msimd=lsx -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=LSX,LASX + +// RUN: %clang --target=loongarch64 -mlasx -mno-lasx -mno-lsx -msimd=lsx -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX +// RUN: %clang --target=loongarch64 -mlsx -mno-lsx -msimd=lsx -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX +// RUN: %clang --target=loongarch64 -mno-lasx -mlsx -mno-lsx -msimd=lsx -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX +// RUN: %clang --target=loongarch64 -mno-lasx -mno-lsx -msimd=lsx -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX +// RUN: %clang --target=loongarch64 -mno-lsx -msimd=lsx -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX +// RUN: %clang --target=loongarch64 -mlasx -mno-lasx -mlsx -mno-lsx -msimd=lsx -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX + +// RUN: %clang --target=loongarch64 -msimd=lsx -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=LSX,NOLASX +// RUN: %clang --target=loongarch64 -mlasx -mno-lasx -msimd=lsx -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=LSX,NOLASX +// RUN: %clang --target=loongarch64 -mlsx -msimd=lsx -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=LSX,NOLASX +// RUN: %clang --target=loongarch64 -mno-lasx -msimd=lsx -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=LSX,NOLASX +// RUN: %clang --target=loongarch64 -mno-lasx -mlsx -msimd=lsx -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=LSX,NOLASX + + +/// COM: -msimd=lasx +// RUN: %clang --target=loongarch64 -msimd=lasx -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=LSX,LASX +// RUN: %clang --target=loongarch64 -mlasx -msimd=lasx -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=LSX,LASX +// RUN: %clang --target=loongarch64 -mlasx -mlsx -msimd=lasx -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=LSX,LASX +// RUN: %clang --target=loongarch64 -mlsx -msimd=lasx -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=LSX,LASX + +// RUN: %clang --target=loongarch64 -mlasx -mno-lasx -msimd=lasx -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX +// RUN: %clang --target=loongarch64 -mno-lasx -msimd=lasx -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=NOLSX,NOLASX + +// RUN: %clang --target=loongarch64 -mlasx -mno-lasx -mlsx -msimd=lasx -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=LSX,NOLASX +// RUN: %clang --target=loongarch64 -mno-lasx -mlsx -msimd=lasx -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=LSX,NOLASX +// RUN: %clang --target=loongarch64 -mlasx -mno-lasx -mlsx -msimd=lsx -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefixes=LSX,NOLASX + + +// NOLSX-NOT: "-target-feature" "+lsx" +// NOLASX-NOT: "-target-feature" "+lasx" +// LSX-DAG: "-target-feature" "+lsx" +// LASX-DAG: "-target-feature" "+lasx" +// NOLSX-NOT: "-target-feature" "+lsx" +// NOLASX-NOT: "-target-feature" "+lasx" diff --git a/clang/test/Driver/mcmodel.c b/clang/test/Driver/mcmodel.c index 2c74c966744af17a798f7d3fa6e778aad99e3610..1565a8768a2ef7e91c7c70b31f555d7242b3bbbe 100644 --- a/clang/test/Driver/mcmodel.c +++ b/clang/test/Driver/mcmodel.c @@ -6,6 +6,14 @@ // RUN: %clang -target powerpc-unknown-aix -### -S -mcmodel=medium %s 2> %t.log // RUN: FileCheck --check-prefix=AIX-MCMEDIUM-OVERRIDE %s < %t.log // RUN: not %clang -c -mcmodel=lager %s 2>&1 | FileCheck --check-prefix=INVALID %s +// RUN: %clang --target=loongarch64 -### -S -mcmodel=normal %s 2>&1 | FileCheck --check-prefix=SMALL %s +// RUN: %clang --target=loongarch64 -### -S -mcmodel=medium %s 2>&1 | FileCheck --check-prefix=MEDIUM %s +// RUN: %clang --target=loongarch64 -### -S -mcmodel=extreme %s 2>&1 | FileCheck --check-prefix=LARGE %s +// RUN: not %clang -c --target=loongarch64 -mcmodel=tiny %s 2>&1 | FileCheck --check-prefix=ERR-LOONGARCH64-TINY %s +// RUN: not %clang -c --target=loongarch64 -mcmodel=small %s 2>&1 | FileCheck --check-prefix=ERR-LOONGARCH64-SMALL %s +// RUN: not %clang -c --target=loongarch64 -mcmodel=kernel %s 2>&1 | FileCheck --check-prefix=ERR-LOONGARCH64-KERNEL %s +// RUN: not %clang -c --target=loongarch64 -mcmodel=large %s 2>&1 | FileCheck --check-prefix=ERR-LOONGARCH64-LARGE %s +// RUN: not %clang -c --target=loongarch64 -mcmodel=extreme -fplt %s 2>&1 | FileCheck --check-prefix=ERR-LOONGARCH64-PLT-EXTREME %s // TINY: "-mcmodel=tiny" // SMALL: "-mcmodel=small" @@ -15,3 +23,10 @@ // AIX-MCMEDIUM-OVERRIDE: "-mcmodel=large" // INVALID: error: invalid argument 'lager' to -mcmodel= + +// ERR-LOONGARCH64-TINY: error: invalid argument 'tiny' to -mcmodel= +// ERR-LOONGARCH64-SMALL: error: invalid argument 'small' to -mcmodel= +// ERR-LOONGARCH64-KERNEL: error: invalid argument 'kernel' to -mcmodel= +// ERR-LOONGARCH64-LARGE: error: invalid argument 'large' to -mcmodel= + +// ERR-LOONGARCH64-PLT-EXTREME: error: invalid argument '-mcmodel=extreme' not allowed with '-fplt' diff --git a/clang/test/Preprocessor/init-loongarch.c b/clang/test/Preprocessor/init-loongarch.c index 4ef42a921ec0339cf465f7e5a5b057f898b285e7..154ad82e0f8c1fa4057c86235ec6037970c885f3 100644 --- a/clang/test/Preprocessor/init-loongarch.c +++ b/clang/test/Preprocessor/init-loongarch.c @@ -807,3 +807,41 @@ // ARCH-TUNE: #define __loongarch_arch "[[ARCH]]" // ARCH-TUNE: #define __loongarch_tune "[[TUNE]]" + +// RUN: %clang --target=loongarch64 -mlsx -x c -E -dM %s -o - \ +// RUN: | FileCheck --match-full-lines --check-prefix=MLSX %s +// RUN: %clang --target=loongarch64 -mno-lsx -mlsx -x c -E -dM %s -o - \ +// RUN: | FileCheck --match-full-lines --check-prefix=MLSX %s +// RUN: %clang --target=loongarch64 -mlsx -mno-lasx -x c -E -dM %s -o - \ +// RUN: | FileCheck --match-full-lines --check-prefix=MLSX %s +// RUN: %clang --target=loongarch64 -mno-lasx -mlsx -x c -E -dM %s -o - \ +// RUN: | FileCheck --match-full-lines --check-prefix=MLSX %s +// MLSX-NOT: #define __loongarch_asx +// MLSX: #define __loongarch_simd_width 128 +// MLSX: #define __loongarch_sx 1 + +// RUN: %clang --target=loongarch64 -mlasx -x c -E -dM %s -o - \ +// RUN: | FileCheck --match-full-lines --check-prefix=MLASX %s +// RUN: %clang --target=loongarch64 -mno-lasx -mlasx -x c -E -dM %s -o - \ +// RUN: | FileCheck --match-full-lines --check-prefix=MLASX %s +// RUN: %clang --target=loongarch64 -mlsx -mlasx -x c -E -dM %s -o - \ +// RUN: | FileCheck --match-full-lines --check-prefix=MLASX %s +// RUN: %clang --target=loongarch64 -mlasx -mlsx -x c -E -dM %s -o - \ +// RUN: | FileCheck --match-full-lines --check-prefix=MLASX %s +// MLASX: #define __loongarch_asx 1 +// MLASX: #define __loongarch_simd_width 256 +// MLASX: #define __loongarch_sx 1 + +// RUN: %clang --target=loongarch64 -mno-lsx -x c -E -dM %s -o - \ +// RUN: | FileCheck --match-full-lines --check-prefix=MNO-LSX %s +// RUN: %clang --target=loongarch64 -mlsx -mno-lsx -x c -E -dM %s -o - \ +// RUN: | FileCheck --match-full-lines --check-prefix=MNO-LSX %s +// RUN: %clang --target=loongarch64 -mno-lsx -mno-lasx -x c -E -dM %s -o - \ +// RUN: | FileCheck --match-full-lines --check-prefix=MNO-LSX %s +// RUN: %clang --target=loongarch64 -mno-lasx -mno-lsx -x c -E -dM %s -o - \ +// RUN: | FileCheck --match-full-lines --check-prefix=MNO-LSX %s +// RUN: %clang --target=loongarch64 -mno-lasx -x c -E -dM %s -o - \ +// RUN: | FileCheck --match-full-lines --check-prefix=MNO-LSX %s +// MNO-LSX-NOT: #define __loongarch_asx +// MNO-LSX-NOT: #define __loongarch_simd_width +// MNO-LSX-NOT: #define __loongarch_sx diff --git a/clang/test/Sema/attr-model.cpp b/clang/test/Sema/attr-model.cpp new file mode 100644 index 0000000000000000000000000000000000000000..898cc039398436f987b6146e52cbcabe43532675 --- /dev/null +++ b/clang/test/Sema/attr-model.cpp @@ -0,0 +1,64 @@ +// RUN: %clang_cc1 -triple aarch64 -verify=expected,aarch64 -fsyntax-only %s +// RUN: %clang_cc1 -triple loongarch64 -verify=expected,loongarch64 -fsyntax-only %s +// RUN: %clang_cc1 -triple mips64 -verify=expected,mips64 -fsyntax-only %s +// RUN: %clang_cc1 -triple powerpc64 -verify=expected,powerpc64 -fsyntax-only %s +// RUN: %clang_cc1 -triple riscv64 -verify=expected,riscv64 -fsyntax-only %s +// RUN: %clang_cc1 -triple x86_64 -verify=expected,x86_64 -fsyntax-only %s + +#if defined(__loongarch__) && !__has_attribute(model) +#error "Should support model attribute" +#endif + +int a __attribute((model("tiny"))); // aarch64-warning {{unknown attribute 'model' ignored}} \ + // loongarch64-error {{code model 'tiny' is not supported on this target}} \ + // mips64-warning {{unknown attribute 'model' ignored}} \ + // powerpc64-warning {{unknown attribute 'model' ignored}} \ + // riscv64-warning {{unknown attribute 'model' ignored}} \ + // x86_64-warning {{unknown attribute 'model' ignored}} +int b __attribute((model("small"))); // aarch64-warning {{unknown attribute 'model' ignored}} \ + // loongarch64-error {{code model 'small' is not supported on this target}} \ + // mips64-warning {{unknown attribute 'model' ignored}} \ + // powerpc64-warning {{unknown attribute 'model' ignored}} \ + // riscv64-warning {{unknown attribute 'model' ignored}} \ + // x86_64-warning {{unknown attribute 'model' ignored}} +int c __attribute((model("normal"))); // aarch64-warning {{unknown attribute 'model' ignored}} \ + // mips64-warning {{unknown attribute 'model' ignored}} \ + // powerpc64-warning {{unknown attribute 'model' ignored}} \ + // riscv64-warning {{unknown attribute 'model' ignored}} \ + // x86_64-warning {{unknown attribute 'model' ignored}} +int d __attribute((model("kernel"))); // aarch64-warning {{unknown attribute 'model' ignored}} \ + // loongarch64-error {{code model 'kernel' is not supported on this target}} \ + // mips64-warning {{unknown attribute 'model' ignored}} \ + // powerpc64-warning {{unknown attribute 'model' ignored}} \ + // riscv64-warning {{unknown attribute 'model' ignored}} \ + // x86_64-warning {{unknown attribute 'model' ignored}} +int e __attribute((model("medium"))); // aarch64-warning {{unknown attribute 'model' ignored}} \ + // mips64-warning {{unknown attribute 'model' ignored}} \ + // powerpc64-warning {{unknown attribute 'model' ignored}} \ + // riscv64-warning {{unknown attribute 'model' ignored}} \ + // x86_64-warning {{unknown attribute 'model' ignored}} +int f __attribute((model("large"))); // aarch64-warning {{unknown attribute 'model' ignored}} \ + // loongarch64-error {{code model 'large' is not supported on this target}} \ + // mips64-warning {{unknown attribute 'model' ignored}} \ + // powerpc64-warning {{unknown attribute 'model' ignored}} \ + // riscv64-warning {{unknown attribute 'model' ignored}} \ + // x86_64-warning {{unknown attribute 'model' ignored}} +int g __attribute((model("extreme"))); // aarch64-warning {{unknown attribute 'model' ignored}} \ + // mips64-warning {{unknown attribute 'model' ignored}} \ + // powerpc64-warning {{unknown attribute 'model' ignored}} \ + // riscv64-warning {{unknown attribute 'model' ignored}} \ + // x86_64-warning {{unknown attribute 'model' ignored}} + +void __attribute((model("extreme"))) h() {} // aarch64-warning {{unknown attribute 'model' ignored}} \ + // loongarch64-error {{'model' attribute only applies to non-TLS global variables}} \ + // mips64-warning {{unknown attribute 'model' ignored}} \ + // powerpc64-warning {{unknown attribute 'model' ignored}} \ + // riscv64-warning {{unknown attribute 'model' ignored}} \ + // x86_64-warning {{unknown attribute 'model' ignored}} + +thread_local int i __attribute((model("extreme"))); // aarch64-warning {{unknown attribute 'model' ignored}} \ + // loongarch64-error {{'model' attribute only applies to non-TLS global variables}} \ + // mips64-warning {{unknown attribute 'model' ignored}} \ + // powerpc64-warning {{unknown attribute 'model' ignored}} \ + // riscv64-warning {{unknown attribute 'model' ignored}} \ + // x86_64-warning {{unknown attribute 'model' ignored}} diff --git a/clang/utils/TableGen/ClangAttrEmitter.cpp b/clang/utils/TableGen/ClangAttrEmitter.cpp index 9d1ec9bd9d8691db3f172a74c2d2f217f9614a43..777b8d17113127f3c623582283f7b2cd12e2e1f9 100644 --- a/clang/utils/TableGen/ClangAttrEmitter.cpp +++ b/clang/utils/TableGen/ClangAttrEmitter.cpp @@ -839,15 +839,25 @@ namespace { } class EnumArgument : public Argument { - std::string type; + std::string fullType; + StringRef shortType; std::vector values, enums, uniques; + bool isExternal; public: EnumArgument(const Record &Arg, StringRef Attr) - : Argument(Arg, Attr), type(std::string(Arg.getValueAsString("Type"))), - values(Arg.getValueAsListOfStrings("Values")), + : Argument(Arg, Attr), values(Arg.getValueAsListOfStrings("Values")), enums(Arg.getValueAsListOfStrings("Enums")), - uniques(uniqueEnumsInOrder(enums)) { + uniques(uniqueEnumsInOrder(enums)), + isExternal(Arg.getValueAsBit("IsExternalType")) { + StringRef Type = Arg.getValueAsString("Type"); + shortType = isExternal ? Type.rsplit("::").second : Type; + // If shortType didn't contain :: at all rsplit will give us an empty + // string. + if (shortType.empty()) + shortType = Type; + fullType = isExternal ? Type.str() : (getAttrName() + "Attr::" + Type).str(); + // FIXME: Emit a proper error assert(!uniques.empty()); } @@ -855,7 +865,7 @@ namespace { bool isEnumArg() const override { return true; } void writeAccessors(raw_ostream &OS) const override { - OS << " " << type << " get" << getUpperName() << "() const {\n"; + OS << " " << fullType << " get" << getUpperName() << "() const {\n"; OS << " return " << getLowerName() << ";\n"; OS << " }"; } @@ -871,30 +881,32 @@ namespace { OS << getLowerName() << "(" << getUpperName() << ")"; } void writeCtorDefaultInitializers(raw_ostream &OS) const override { - OS << getLowerName() << "(" << type << "(0))"; + OS << getLowerName() << "(" << fullType << "(0))"; } void writeCtorParameters(raw_ostream &OS) const override { - OS << type << " " << getUpperName(); + OS << fullType << " " << getUpperName(); } void writeDeclarations(raw_ostream &OS) const override { - auto i = uniques.cbegin(), e = uniques.cend(); - // The last one needs to not have a comma. - --e; + if (!isExternal) { + auto i = uniques.cbegin(), e = uniques.cend(); + // The last one needs to not have a comma. + --e; + + OS << "public:\n"; + OS << " enum " << shortType << " {\n"; + for (; i != e; ++i) + OS << " " << *i << ",\n"; + OS << " " << *e << "\n"; + OS << " };\n"; + } - OS << "public:\n"; - OS << " enum " << type << " {\n"; - for (; i != e; ++i) - OS << " " << *i << ",\n"; - OS << " " << *e << "\n"; - OS << " };\n"; OS << "private:\n"; - OS << " " << type << " " << getLowerName() << ";"; + OS << " " << fullType << " " << getLowerName() << ";"; } void writePCHReadDecls(raw_ostream &OS) const override { - OS << " " << getAttrName() << "Attr::" << type << " " << getLowerName() - << "(static_cast<" << getAttrName() << "Attr::" << type - << ">(Record.readInt()));\n"; + OS << " " << fullType << " " << getLowerName() << "(static_cast<" + << fullType << ">(Record.readInt()));\n"; } void writePCHReadArgs(raw_ostream &OS) const override { @@ -902,45 +914,50 @@ namespace { } void writePCHWrite(raw_ostream &OS) const override { - OS << "Record.push_back(SA->get" << getUpperName() << "());\n"; + OS << "Record.push_back(static_cast(SA->get" << getUpperName() + << "()));\n"; } void writeValue(raw_ostream &OS) const override { // FIXME: this isn't 100% correct -- some enum arguments require printing // as a string literal, while others require printing as an identifier. // Tablegen currently does not distinguish between the two forms. - OS << "\\\"\" << " << getAttrName() << "Attr::Convert" << type << "ToStr(get" - << getUpperName() << "()) << \"\\\""; + OS << "\\\"\" << " << getAttrName() << "Attr::Convert" << shortType + << "ToStr(get" << getUpperName() << "()) << \"\\\""; } void writeDump(raw_ostream &OS) const override { OS << " switch(SA->get" << getUpperName() << "()) {\n"; for (const auto &I : uniques) { - OS << " case " << getAttrName() << "Attr::" << I << ":\n"; + OS << " case " << fullType << "::" << I << ":\n"; OS << " OS << \" " << I << "\";\n"; OS << " break;\n"; } + if (isExternal) { + OS << " default:\n"; + OS << " llvm_unreachable(\"Invalid attribute value\");\n"; + } OS << " }\n"; } void writeConversion(raw_ostream &OS, bool Header) const { if (Header) { - OS << " static bool ConvertStrTo" << type << "(StringRef Val, " << type - << " &Out);\n"; - OS << " static const char *Convert" << type << "ToStr(" << type - << " Val);\n"; + OS << " static bool ConvertStrTo" << shortType << "(StringRef Val, " + << fullType << " &Out);\n"; + OS << " static const char *Convert" << shortType << "ToStr(" + << fullType << " Val);\n"; return; } - OS << "bool " << getAttrName() << "Attr::ConvertStrTo" << type - << "(StringRef Val, " << type << " &Out) {\n"; - OS << " Optional<" << type << "> R = llvm::StringSwitch>(Val)\n"; + OS << "bool " << getAttrName() << "Attr::ConvertStrTo" << shortType + << "(StringRef Val, " << fullType << " &Out) {\n"; + OS << " Optional<" << fullType << "> " + << "R = llvm::StringSwitch>(Val)\n"; for (size_t I = 0; I < enums.size(); ++I) { OS << " .Case(\"" << values[I] << "\", "; - OS << getAttrName() << "Attr::" << enums[I] << ")\n"; + OS << fullType << "::" << enums[I] << ")\n"; } - OS << " .Default(Optional<" << type << ">());\n"; + OS << " .Default(Optional<" << fullType << ">());\n"; OS << " if (R) {\n"; OS << " Out = *R;\n return true;\n }\n"; OS << " return false;\n"; @@ -950,14 +967,17 @@ namespace { // trivial because some enumeration values have multiple named // enumerators, such as type_visibility(internal) and // type_visibility(hidden) both mapping to TypeVisibilityAttr::Hidden. - OS << "const char *" << getAttrName() << "Attr::Convert" << type - << "ToStr(" << type << " Val) {\n" + OS << "const char *" << getAttrName() << "Attr::Convert" << shortType + << "ToStr(" << fullType << " Val) {\n" << " switch(Val) {\n"; SmallDenseSet Uniques; for (size_t I = 0; I < enums.size(); ++I) { if (Uniques.insert(enums[I]).second) - OS << " case " << getAttrName() << "Attr::" << enums[I] - << ": return \"" << values[I] << "\";\n"; + OS << " case " << fullType << "::" << enums[I] << ": return \"" + << values[I] << "\";\n"; + } + if (isExternal) { + OS << " default: llvm_unreachable(\"Invalid attribute value\");\n"; } OS << " }\n" << " llvm_unreachable(\"No enumerator with that value\");\n" @@ -966,27 +986,36 @@ namespace { }; class VariadicEnumArgument: public VariadicArgument { - std::string type, QualifiedTypeName; + std::string fullType; + StringRef shortType; std::vector values, enums, uniques; + bool isExternal; protected: void writeValueImpl(raw_ostream &OS) const override { // FIXME: this isn't 100% correct -- some enum arguments require printing // as a string literal, while others require printing as an identifier. // Tablegen currently does not distinguish between the two forms. - OS << " OS << \"\\\"\" << " << getAttrName() << "Attr::Convert" << type - << "ToStr(Val)" << "<< \"\\\"\";\n"; + OS << " OS << \"\\\"\" << " << getAttrName() << "Attr::Convert" + << shortType << "ToStr(Val)" + << "<< \"\\\"\";\n"; } public: VariadicEnumArgument(const Record &Arg, StringRef Attr) : VariadicArgument(Arg, Attr, std::string(Arg.getValueAsString("Type"))), - type(std::string(Arg.getValueAsString("Type"))), values(Arg.getValueAsListOfStrings("Values")), enums(Arg.getValueAsListOfStrings("Enums")), - uniques(uniqueEnumsInOrder(enums)) { - QualifiedTypeName = getAttrName().str() + "Attr::" + type; + uniques(uniqueEnumsInOrder(enums)), + isExternal(Arg.getValueAsBit("IsExternalType")) { + StringRef Type = Arg.getValueAsString("Type"); + shortType = isExternal ? Type.rsplit("::").second : Type; + // If shortType didn't contain :: at all rsplit will give us an empty + // string. + if (shortType.empty()) + shortType = Type; + fullType = isExternal ? Type.str() : (getAttrName() + "Attr::" + Type).str(); // FIXME: Emit a proper error assert(!uniques.empty()); @@ -995,16 +1024,18 @@ namespace { bool isVariadicEnumArg() const override { return true; } void writeDeclarations(raw_ostream &OS) const override { - auto i = uniques.cbegin(), e = uniques.cend(); - // The last one needs to not have a comma. - --e; - - OS << "public:\n"; - OS << " enum " << type << " {\n"; - for (; i != e; ++i) - OS << " " << *i << ",\n"; - OS << " " << *e << "\n"; - OS << " };\n"; + if (!isExternal) { + auto i = uniques.cbegin(), e = uniques.cend(); + // The last one needs to not have a comma. + --e; + + OS << "public:\n"; + OS << " enum " << shortType << " {\n"; + for (; i != e; ++i) + OS << " " << *i << ",\n"; + OS << " " << *e << "\n"; + OS << " };\n"; + } OS << "private:\n"; VariadicArgument::writeDeclarations(OS); @@ -1016,7 +1047,7 @@ namespace { << getLowerName() << "_end(); I != E; ++I) {\n"; OS << " switch(*I) {\n"; for (const auto &UI : uniques) { - OS << " case " << getAttrName() << "Attr::" << UI << ":\n"; + OS << " case " << fullType << "::" << UI << ":\n"; OS << " OS << \" " << UI << "\";\n"; OS << " break;\n"; } @@ -1026,13 +1057,13 @@ namespace { void writePCHReadDecls(raw_ostream &OS) const override { OS << " unsigned " << getLowerName() << "Size = Record.readInt();\n"; - OS << " SmallVector<" << QualifiedTypeName << ", 4> " << getLowerName() + OS << " SmallVector<" << fullType << ", 4> " << getLowerName() << ";\n"; OS << " " << getLowerName() << ".reserve(" << getLowerName() << "Size);\n"; OS << " for (unsigned i = " << getLowerName() << "Size; i; --i)\n"; - OS << " " << getLowerName() << ".push_back(" << "static_cast<" - << QualifiedTypeName << ">(Record.readInt()));\n"; + OS << " " << getLowerName() << ".push_back(" + << "static_cast<" << fullType << ">(Record.readInt()));\n"; } void writePCHWrite(raw_ostream &OS) const override { @@ -1040,41 +1071,41 @@ namespace { OS << " for (" << getAttrName() << "Attr::" << getLowerName() << "_iterator i = SA->" << getLowerName() << "_begin(), e = SA->" << getLowerName() << "_end(); i != e; ++i)\n"; - OS << " " << WritePCHRecord(QualifiedTypeName, "(*i)"); + OS << " " << WritePCHRecord(fullType, "(*i)"); } void writeConversion(raw_ostream &OS, bool Header) const { if (Header) { - OS << " static bool ConvertStrTo" << type << "(StringRef Val, " << type - << " &Out);\n"; - OS << " static const char *Convert" << type << "ToStr(" << type - << " Val);\n"; + OS << " static bool ConvertStrTo" << shortType << "(StringRef Val, " + << fullType << " &Out);\n"; + OS << " static const char *Convert" << shortType << "ToStr(" + << fullType << " Val);\n"; return; } - OS << "bool " << getAttrName() << "Attr::ConvertStrTo" << type + OS << "bool " << getAttrName() << "Attr::ConvertStrTo" << shortType << "(StringRef Val, "; - OS << type << " &Out) {\n"; - OS << " Optional<" << type << "> R = llvm::StringSwitch>(Val)\n"; + OS << fullType << " &Out) {\n"; + OS << " Optional<" << fullType << "> R = llvm::StringSwitch>(Val)\n"; for (size_t I = 0; I < enums.size(); ++I) { OS << " .Case(\"" << values[I] << "\", "; - OS << getAttrName() << "Attr::" << enums[I] << ")\n"; + OS << fullType << "::" << enums[I] << ")\n"; } - OS << " .Default(Optional<" << type << ">());\n"; + OS << " .Default(Optional<" << fullType << ">());\n"; OS << " if (R) {\n"; OS << " Out = *R;\n return true;\n }\n"; OS << " return false;\n"; OS << "}\n\n"; - OS << "const char *" << getAttrName() << "Attr::Convert" << type - << "ToStr(" << type << " Val) {\n" + OS << "const char *" << getAttrName() << "Attr::Convert" << shortType + << "ToStr(" << fullType << " Val) {\n" << " switch(Val) {\n"; SmallDenseSet Uniques; for (size_t I = 0; I < enums.size(); ++I) { if (Uniques.insert(enums[I]).second) - OS << " case " << getAttrName() << "Attr::" << enums[I] - << ": return \"" << values[I] << "\";\n"; + OS << " case " << fullType << "::" << enums[I] << ": return \"" + << values[I] << "\";\n"; } OS << " }\n" << " llvm_unreachable(\"No enumerator with that value\");\n" diff --git a/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake b/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake index f085b0f10b58d024d0d9f0b6159987877e89a9b2..ad5b1399bbccf4126ab4aaf3fb55715c6530f5ea 100644 --- a/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake +++ b/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake @@ -38,16 +38,17 @@ else() endif() if(OS_NAME MATCHES "Linux") - set(ALL_FUZZER_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64} ${S390X}) + set(ALL_FUZZER_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64} ${S390X} + ${LOONGARCH64}) elseif (OS_NAME MATCHES "Windows") set(ALL_FUZZER_SUPPORTED_ARCH ${X86} ${X86_64}) elseif(OS_NAME MATCHES "Android|OHOS") - set(ALL_FUZZER_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64}) + set(ALL_FUZZER_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64} ${LOONGARCH64}) else() set(ALL_FUZZER_SUPPORTED_ARCH ${X86_64} ${ARM64}) endif() -set(ALL_GWP_ASAN_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64}) +set(ALL_GWP_ASAN_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64} ${LOONGARCH64}) # OHOS_LOCAL if(APPLE) set(ALL_LSAN_SUPPORTED_ARCH ${X86} ${X86_64} ${MIPS64} ${ARM64}) else() diff --git a/compiler-rt/lib/fuzzer/FuzzerTracePC.cpp b/compiler-rt/lib/fuzzer/FuzzerTracePC.cpp index f12f7aa61bc4a843abea6f96dcbddf5d139df42b..7f4e8ef91c447c2bdad588c3222ab958981a8bfc 100644 --- a/compiler-rt/lib/fuzzer/FuzzerTracePC.cpp +++ b/compiler-rt/lib/fuzzer/FuzzerTracePC.cpp @@ -149,8 +149,8 @@ inline ALWAYS_INLINE uintptr_t GetPreviousInstructionPc(uintptr_t PC) { ALWAYS_INLINE uintptr_t TracePC::GetNextInstructionPc(uintptr_t PC) { #if defined(__mips__) return PC + 8; -#elif defined(__powerpc__) || defined(__sparc__) || defined(__arm__) || \ - defined(__aarch64__) +#elif defined(__powerpc__) || defined(__sparc__) || defined(__arm__) || \ + defined(__aarch64__) || defined(__loongarch__) return PC + 4; #else return PC + 1; diff --git a/compiler-rt/lib/fuzzer/FuzzerUtil.cpp b/compiler-rt/lib/fuzzer/FuzzerUtil.cpp index aeab70f20c28e87bc70e8f4d5fdcb13337ff02af..71c3dc1ce7afd6c9d08f2de8a1e9dce7a29a81d8 100644 --- a/compiler-rt/lib/fuzzer/FuzzerUtil.cpp +++ b/compiler-rt/lib/fuzzer/FuzzerUtil.cpp @@ -21,6 +21,7 @@ #include #include #include +#include namespace fuzzer { @@ -234,4 +235,9 @@ uint64_t SimpleFastHash(const void *Data, size_t Size, uint64_t Initial) { return Res; } +size_t PageSize() { + static size_t PageSizeCached = sysconf(_SC_PAGESIZE); + return PageSizeCached; +} + } // namespace fuzzer diff --git a/compiler-rt/lib/fuzzer/FuzzerUtil.h b/compiler-rt/lib/fuzzer/FuzzerUtil.h index 71d49097e559a8091f7d6cdfedd98b41fc8a8848..5296e7784b3f58822a1a2aa61ef29200fe5cb2cb 100644 --- a/compiler-rt/lib/fuzzer/FuzzerUtil.h +++ b/compiler-rt/lib/fuzzer/FuzzerUtil.h @@ -94,7 +94,8 @@ inline size_t Log(size_t X) { return static_cast((sizeof(unsigned long long) * 8) - Clzll(X) - 1); } -inline size_t PageSize() { return 4096; } +size_t PageSize(); + inline uint8_t *RoundUpByPage(uint8_t *P) { uintptr_t X = reinterpret_cast(P); size_t Mask = PageSize() - 1; diff --git a/compiler-rt/lib/gwp_asan/common.h b/compiler-rt/lib/gwp_asan/common.h index 6b238ad9ecbdce72fe6e02b20acdda29fca006e8..aa06460176aeb968df12136a55bff71274be189c 100644 --- a/compiler-rt/lib/gwp_asan/common.h +++ b/compiler-rt/lib/gwp_asan/common.h @@ -176,6 +176,13 @@ static_assert(sizeof(AllocatorState) == 32, ""); static_assert(offsetof(AllocatorState, FailureAddress) == 28, ""); static_assert(sizeof(AllocationMetadata) == 560, ""); static_assert(offsetof(AllocationMetadata, IsDeallocated) == 552, ""); +// OHOS_LOCAL begin +#elif defined(__loongarch__) +static_assert(sizeof(AllocatorState) == 56, ""); +static_assert(offsetof(AllocatorState, FailureAddress) == 48, ""); +static_assert(sizeof(AllocationMetadata) == 568, ""); +static_assert(offsetof(AllocationMetadata, IsDeallocated) == 560, ""); +// OHOS_LOCAL end #endif // defined($ARCHITECTURE) } // namespace gwp_asan diff --git a/compiler-rt/lib/gwp_asan/tests/alignment.cpp b/compiler-rt/lib/gwp_asan/tests/alignment.cpp index 9f150467c79af3435ed9ceb65cdec55788eb657c..b7013818734c0d4a712b602f60fb401b3d456efe 100644 --- a/compiler-rt/lib/gwp_asan/tests/alignment.cpp +++ b/compiler-rt/lib/gwp_asan/tests/alignment.cpp @@ -24,6 +24,7 @@ public: } }; +// FIXME: loongarch is 16k. // Global assumptions for these tests: // 1. Page size is 0x1000. // 2. All tests assume a slot is multipage, between 0x4000 - 0x8000. While we diff --git a/compiler-rt/lib/gwp_asan/tests/basic.cpp b/compiler-rt/lib/gwp_asan/tests/basic.cpp index 88e7ed14a5c2f775a6618343ff6f7bb1795a700c..e9f2b6e5aa852db9fff485ca692f4979aef06f98 100644 --- a/compiler-rt/lib/gwp_asan/tests/basic.cpp +++ b/compiler-rt/lib/gwp_asan/tests/basic.cpp @@ -65,11 +65,17 @@ TEST_F(DefaultGuardedPoolAllocator, NonPowerOfTwoAlignment) { // Added multi-page slots? You'll need to expand this test. TEST_F(DefaultGuardedPoolAllocator, TooBigForSinglePageSlots) { - EXPECT_EQ(nullptr, GPA.allocate(0x1001, 0)); - EXPECT_EQ(nullptr, GPA.allocate(0x1001, 1)); - EXPECT_EQ(nullptr, GPA.allocate(0x1001, 0x1000)); - EXPECT_EQ(nullptr, GPA.allocate(1, 0x2000)); - EXPECT_EQ(nullptr, GPA.allocate(0, 0x2000)); + // OHOS_LOCAL begin + size_t PageSize = 0x1000; +#if defined(__loongarch__) + PageSize = 0x4000; +#endif + EXPECT_EQ(nullptr, GPA.allocate(PageSize + 0x1, 0)); + EXPECT_EQ(nullptr, GPA.allocate(PageSize + 0x1, 1)); + EXPECT_EQ(nullptr, GPA.allocate(PageSize + 0x1, PageSize)); + EXPECT_EQ(nullptr, GPA.allocate(1, PageSize + 0x1000)); + EXPECT_EQ(nullptr, GPA.allocate(0, PageSize + 0x1000)); + // OHOS_LOCAL end } TEST_F(CustomGuardedPoolAllocator, AllocAllSlots) { diff --git a/compiler-rt/lib/gwp_asan/tests/crash_handler_api.cpp b/compiler-rt/lib/gwp_asan/tests/crash_handler_api.cpp index 4cdb5694842f9069e19219f9792f866c2befb401..bfa9a1047c18b08927af18dd52f9555db4799403 100644 --- a/compiler-rt/lib/gwp_asan/tests/crash_handler_api.cpp +++ b/compiler-rt/lib/gwp_asan/tests/crash_handler_api.cpp @@ -42,6 +42,7 @@ protected: State.GuardedPagePool = 0x2000; State.GuardedPagePoolEnd = 0xb000; State.MaxSimultaneousAllocations = 4; // 0x3000, 0x5000, 0x7000, 0x9000. + // FIXME: loongarch is 16k. State.PageSize = 0x1000; } diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp index c23ffafa58f40876b374f9c467ad7961e3f4fa78..22e50cdcc24a9c5df72bebdea5496069028ad921 100644 --- a/lld/ELF/Arch/LoongArch.cpp +++ b/lld/ELF/Arch/LoongArch.cpp @@ -82,90 +82,33 @@ static uint64_t getLoongArchPage(uint64_t p) { static uint32_t lo12(uint32_t val) { return val & 0xfff; } // Calculate the adjusted page delta between dest and PC. -uint64_t elf::getLoongArchPageDelta(uint64_t dest, uint64_t pc) { - // Consider the large code model access pattern, of which the smaller code - // models' access patterns are a subset: - // - // pcalau12i U, %foo_hi20(sym) ; b in [-0x80000, 0x7ffff] - // addi.d T, zero, %foo_lo12(sym) ; a in [-0x800, 0x7ff] - // lu32i.d T, %foo64_lo20(sym) ; c in [-0x80000, 0x7ffff] - // lu52i.d T, T, %foo64_hi12(sym) ; d in [-0x800, 0x7ff] - // {ldx,stx,add}.* dest, U, T - // - // Let page(pc) = 0xRRR'QQQQQ'PPPPP'000 and dest = 0xZZZ'YYYYY'XXXXX'AAA, - // with RQ, P, ZY, X and A representing the respective bitfields as unsigned - // integers. We have: - // - // page(dest) = 0xZZZ'YYYYY'XXXXX'000 - // - page(pc) = 0xRRR'QQQQQ'PPPPP'000 - // ---------------------------------- - // 0xddd'ccccc'bbbbb'000 - // - // Now consider the above pattern's actual effects: - // - // page(pc) 0xRRR'QQQQQ'PPPPP'000 - // pcalau12i + 0xiii'iiiii'bbbbb'000 - // addi + 0xjjj'jjjjj'kkkkk'AAA - // lu32i.d & lu52i.d + 0xddd'ccccc'00000'000 - // -------------------------------------------------- - // dest = U + T - // = ((RQ<<32) + (P<<12) + i + (b<<12)) + (j + k + A + (cd<<32)) - // = (((RQ+cd)<<32) + i + j) + (((P+b)<<12) + k) + A - // = (ZY<<32) + (X<<12) + A - // - // ZY<<32 = (RQ<<32)+(cd<<32)+i+j, X<<12 = (P<<12)+(b<<12)+k - // cd<<32 = (ZY<<32)-(RQ<<32)-i-j, b<<12 = (X<<12)-(P<<12)-k - // - // where i and k are terms representing the effect of b's and A's sign - // extension respectively. - // - // i = signed b < 0 ? -0x10000'0000 : 0 - // k = signed A < 0 ? -0x1000 : 0 - // - // The j term is a bit complex: it represents the higher half of - // sign-extended bits from A that are effectively lost if i == 0 but k != 0, - // due to overwriting by lu32i.d & lu52i.d. - // - // j = signed A < 0 && signed b >= 0 ? 0x10000'0000 : 0 - // - // The actual effect of the instruction sequence before the final addition, - // i.e. our desired result value, is thus: - // - // result = (cd<<32) + (b<<12) - // = (ZY<<32)-(RQ<<32)-i-j + (X<<12)-(P<<12)-k - // = ((ZY<<32)+(X<<12)) - ((RQ<<32)+(P<<12)) - i - j - k - // = page(dest) - page(pc) - i - j - k - // - // when signed A >= 0 && signed b >= 0: - // - // i = j = k = 0 - // result = page(dest) - page(pc) - // - // when signed A >= 0 && signed b < 0: - // - // i = -0x10000'0000, j = k = 0 - // result = page(dest) - page(pc) + 0x10000'0000 - // - // when signed A < 0 && signed b >= 0: - // - // i = 0, j = 0x10000'0000, k = -0x1000 - // result = page(dest) - page(pc) - 0x10000'0000 + 0x1000 - // - // when signed A < 0 && signed b < 0: - // - // i = -0x10000'0000, j = 0, k = -0x1000 - // result = page(dest) - page(pc) + 0x1000 - uint64_t result = getLoongArchPage(dest) - getLoongArchPage(pc); - bool negativeA = lo12(dest) > 0x7ff; - bool negativeB = (result & 0x8000'0000) != 0; - - if (negativeA) - result += 0x1000; - if (negativeA && !negativeB) - result -= 0x10000'0000; - else if (!negativeA && negativeB) - result += 0x10000'0000; - +uint64_t elf::getLoongArchPageDelta(uint64_t dest, uint64_t pc, RelType type) { + // Note that if the sequence being relocated is `pcalau12i + addi.d + lu32i.d + // + lu52i.d`, they must be adjancent so that we can infer the PC of + // `pcalau12i` when calculating the page delta for the other two instructions + // (lu32i.d and lu52i.d). Compensate all the sign-extensions is a bit + // complicated. Just use psABI recommended algorithm. + uint64_t pcalau12i_pc; + switch (type) { + case R_LARCH_PCALA64_LO20: + case R_LARCH_GOT64_PC_LO20: + case R_LARCH_TLS_IE64_PC_LO20: + pcalau12i_pc = pc - 8; + break; + case R_LARCH_PCALA64_HI12: + case R_LARCH_GOT64_PC_HI12: + case R_LARCH_TLS_IE64_PC_HI12: + pcalau12i_pc = pc - 12; + break; + default: + pcalau12i_pc = pc; + break; + } + uint64_t result = getLoongArchPage(dest) - getLoongArchPage(pcalau12i_pc); + if (dest & 0x800) + result += 0x1000 - 0x1'0000'0000; + if (result & 0x8000'0000) + result += 0x1'0000'0000; return result; } @@ -462,6 +405,7 @@ RelExpr LoongArch::getRelExpr(const RelType type, const Symbol &s, case R_LARCH_B16: case R_LARCH_B21: case R_LARCH_B26: + case R_LARCH_CALL36: return R_PLT_PC; case R_LARCH_TLS_IE_PC_HI20: case R_LARCH_TLS_IE64_PC_LO20: @@ -591,6 +535,25 @@ void LoongArch::relocate(uint8_t *loc, const Relocation &rel, write32le(loc, setD10k16(read32le(loc), val >> 2)); return; + case R_LARCH_CALL36: { + // This relocation is designed for adjancent pcaddu18i+jirl pairs that + // are patched in one time. Because of sign extension of these insns' + // immediate fields, the relocation range is [-128G - 0x20000, +128G - + // 0x20000) (of course must be 4-byte aligned). + if (((int64_t)val + 0x20000) != llvm::SignExtend64(val + 0x20000, 38)) + reportRangeError(loc, rel, Twine(val), llvm::minIntN(38) - 0x20000, + llvm::maxIntN(38) - 0x20000); + checkAlignment(loc, val, 4, rel); + // Since jirl performs sign extension on the offset immediate, adds (1<<17) + // to original val to get the correct hi20. + uint32_t hi20 = extractBits(val + (1 << 17), 37, 18); + // Despite the name, the lower part is actually 18 bits with 4-byte aligned. + uint32_t lo16 = extractBits(val, 17, 2); + write32le(loc, setJ20(read32le(loc), hi20)); + write32le(loc + 4, setK16(read32le(loc + 4), lo16)); + return; + } + // Relocs intended for `addi`, `ld` or `st`. case R_LARCH_PCALA_LO12: // We have to again inspect the insn word to handle the R_LARCH_PCALA_LO12 diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp index 1420d8ce4e58ad9cfcc1b1689cf4cf717a62e53f..fe0d77a31bb4cb83d7924161352f0bd5fc3dcca1 100644 --- a/lld/ELF/InputSection.cpp +++ b/lld/ELF/InputSection.cpp @@ -665,8 +665,8 @@ uint64_t InputSectionBase::getRelocTargetVA(const InputFile *file, RelType type, return sym.getGotVA() + a - p; case R_LOONGARCH_GOT_PAGE_PC: if (sym.needsTlsGd) - return getLoongArchPageDelta(in.got->getGlobalDynAddr(sym) + a, p); - return getLoongArchPageDelta(sym.getGotVA() + a, p); + return getLoongArchPageDelta(in.got->getGlobalDynAddr(sym) + a, p, type); + return getLoongArchPageDelta(sym.getGotVA() + a, p, type); case R_MIPS_GOTREL: return sym.getVA(a) - in.mipsGot->getGp(file); case R_MIPS_GOT_GP: @@ -716,7 +716,7 @@ uint64_t InputSectionBase::getRelocTargetVA(const InputFile *file, RelType type, return 0; } case R_LOONGARCH_PAGE_PC: - return getLoongArchPageDelta(sym.getVA(a), p); + return getLoongArchPageDelta(sym.getVA(a), p, type); case R_PC: case R_ARM_PCA: { uint64_t dest; @@ -751,7 +751,7 @@ uint64_t InputSectionBase::getRelocTargetVA(const InputFile *file, RelType type, case R_PPC64_CALL_PLT: return sym.getPltVA() + a - p; case R_LOONGARCH_PLT_PAGE_PC: - return getLoongArchPageDelta(sym.getPltVA() + a, p); + return getLoongArchPageDelta(sym.getPltVA() + a, p, type); case R_PLT_GOTPLT: return sym.getPltVA() + a - in.gotPlt->getVA(); case R_PPC32_PLTREL: @@ -813,7 +813,7 @@ uint64_t InputSectionBase::getRelocTargetVA(const InputFile *file, RelType type, case R_TLSGD_PC: return in.got->getGlobalDynAddr(sym) + a - p; case R_LOONGARCH_TLSGD_PAGE_PC: - return getLoongArchPageDelta(in.got->getGlobalDynAddr(sym) + a, p); + return getLoongArchPageDelta(in.got->getGlobalDynAddr(sym) + a, p, type); case R_TLSLD_GOTPLT: return in.got->getVA() + in.got->getTlsIndexOff() + a - in.gotPlt->getVA(); case R_TLSLD_GOT: diff --git a/lld/ELF/Target.h b/lld/ELF/Target.h index 27ef93e00b8b51f2a00c9f0848591d87b136410d..8f866f09467c80f4f3382bbec072866bccac701a 100644 --- a/lld/ELF/Target.h +++ b/lld/ELF/Target.h @@ -230,7 +230,7 @@ void writePrefixedInstruction(uint8_t *loc, uint64_t insn); void addPPC64SaveRestore(); uint64_t getPPC64TocBase(); uint64_t getAArch64Page(uint64_t expr); -uint64_t getLoongArchPageDelta(uint64_t dest, uint64_t pc); +uint64_t getLoongArchPageDelta(uint64_t dest, uint64_t pc, RelType type); void riscvFinalizeRelax(int passes); class AArch64Relaxer { diff --git a/lld/test/ELF/loongarch-call36.s b/lld/test/ELF/loongarch-call36.s new file mode 100644 index 0000000000000000000000000000000000000000..3b1e5c01ebacfaa19372c07c11254e9fea4ef279 --- /dev/null +++ b/lld/test/ELF/loongarch-call36.s @@ -0,0 +1,69 @@ +# REQUIRES: loongarch + +# RUN: rm -rf %t && split-file %s %t +# RUN: llvm-mc --filetype=obj --triple=loongarch64-unknown-elf %t/a.s -o %t/a.o + +# RUN: ld.lld %t/a.o --section-start=.text=0x20010 --section-start=.sec.foo=0x60020 -o %t/exe1 +# RUN: llvm-objdump --no-show-raw-insn -d %t/exe1 | FileCheck --match-full-lines %s --check-prefix=EXE1 +## hi20 = target - pc + (1 << 17) >> 18 = 0x60020 - 0x20010 + 0x20000 >> 18 = 1 +## lo18 = target - pc & (1 << 18) - 1 = 0x60020 - 0x20010 & 0x3ffff = 16 +# EXE1: 20010: pcaddu18i $t0, 1 +# EXE1-NEXT: 20014: jirl $zero, $t0, 16 + +# RUN: ld.lld %t/a.o --section-start=.text=0x20010 --section-start=.sec.foo=0x40020 -o %t/exe2 +# RUN: llvm-objdump --no-show-raw-insn -d %t/exe2 | FileCheck --match-full-lines %s --check-prefix=EXE2 +## hi20 = target - pc + (1 << 17) >> 18 = 0x40020 - 0x20010 + 0x20000 >> 18 = 1 +## lo18 = target - pc & (1 << 18) - 1 = 0x40020 - 0x20010 & 0x3ffff = -131056 +# EXE2: 20010: pcaddu18i $t0, 1 +# EXE2-NEXT: 20014: jirl $zero, $t0, -131056 + +# RUN: ld.lld %t/a.o -shared -T %t/a.t -o %t/a.so +# RUN: llvm-readelf -x .got.plt %t/a.so | FileCheck --check-prefix=GOTPLT %s +# RUN: llvm-objdump -d --no-show-raw-insn %t/a.so | FileCheck --check-prefix=SO %s +## PLT should be present in this case. +# SO: Disassembly of section .plt: +# SO: <.plt>: +## foo@plt: +# SO: 1234520: pcaddu12i $t3, 64{{$}} +# SO-NEXT: ld.d $t3, $t3, 544{{$}} +# SO-NEXT: jirl $t1, $t3, 0 +# SO-NEXT: nop + +# SO: Disassembly of section .text: +# SO: <_start>: +## hi20 = foo@plt - pc + (1 << 17) >> 18 = 0x1234520 - 0x1274670 + 0x20000 >> 18 = -1 +## lo18 = foo@plt - pc & (1 << 18) - 1 = 0x1234520 - 0x1274670 & 0x3ffff = -336 +# SO-NEXT: pcaddu18i $t0, -1{{$}} +# SO-NEXT: jirl $zero, $t0, -336{{$}} + +# GOTPLT: section '.got.plt': +# GOTPLT-NEXT: 0x01274730 00000000 00000000 00000000 00000000 +# GOTPLT-NEXT: 0x01274740 00452301 00000000 + +# RUN: not ld.lld %t/a.o --section-start=.text=0x20000 --section-start=.sec.foo=0x2000020000 -o /dev/null 2>&1 | \ +# RUN: FileCheck -DFILE=%t/a.o --check-prefix=ERROR-RANGE %s +# ERROR-RANGE: error: [[FILE]]:(.text+0x0): relocation R_LARCH_CALL36 out of range: 137438953472 is not in [-137439084544, 137438822399]; references foo + +## Impossible case in reality becasue all LoongArch instructions are fixed 4-bytes long. +# RUN: not ld.lld %t/a.o --section-start=.text=0x20000 --section-start=.sec.foo=0x40001 -o /dev/null 2>&1 | \ +# RUN: FileCheck -DFILE=%t/a.o --check-prefix=ERROR-ALIGN %s +# ERROR-ALIGN: error: [[FILE]]:(.text+0x0): improper alignment for relocation R_LARCH_CALL36: 0x20001 is not aligned to 4 bytes + +#--- a.t +SECTIONS { + .plt 0x1234500: { *(.plt) } + .text 0x1274670: { *(.text) } +} + +#--- a.s +.text +.global _start +_start: + .reloc ., R_LARCH_CALL36, foo + pcaddu18i $t0, 0 + jirl $zero, $t0, 0 + +.section .sec.foo,"awx" +.global foo +foo: + ret diff --git a/lld/test/ELF/loongarch-pc-aligned.s b/lld/test/ELF/loongarch-pc-aligned.s index 9df3492d18772ba59901e1351b9c669090a8581b..0405961e5f74ec236cf58b6acb2b5c66218173c7 100644 --- a/lld/test/ELF/loongarch-pc-aligned.s +++ b/lld/test/ELF/loongarch-pc-aligned.s @@ -75,8 +75,8 @@ ## %pc64_hi12 = 0x444 = 1092 # RUN: ld.lld %t/extreme.o --section-start=.rodata=0x4443333334567111 --section-start=.text=0x0000000012345678 -o %t/extreme0 # RUN: llvm-objdump -d --no-show-raw-insn %t/extreme0 | FileCheck %s --check-prefix=EXTREME0 -# EXTREME0: addi.d $t0, $zero, 273 -# EXTREME0-NEXT: pcalau12i $t1, 139810 +# EXTREME0: pcalau12i $t1, 139810 +# EXTREME0-NEXT: addi.d $t0, $zero, 273 # EXTREME0-NEXT: lu32i.d $t0, 209715 # EXTREME0-NEXT: lu52i.d $t0, $t0, 1092 @@ -87,8 +87,8 @@ ## %pc64_hi12 = 0x444 = 1092 # RUN: ld.lld %t/extreme.o --section-start=.rodata=0x4443333334567888 --section-start=.text=0x0000000012345678 -o %t/extreme1 # RUN: llvm-objdump -d --no-show-raw-insn %t/extreme1 | FileCheck %s --check-prefix=EXTREME1 -# EXTREME1: addi.d $t0, $zero, -1912 -# EXTREME1-NEXT: pcalau12i $t1, 139811 +# EXTREME1: pcalau12i $t1, 139811 +# EXTREME1-NEXT: addi.d $t0, $zero, -1912 # EXTREME1-NEXT: lu32i.d $t0, 209714 # EXTREME1-NEXT: lu52i.d $t0, $t0, 1092 @@ -99,8 +99,8 @@ ## %pc64_hi12 = 0x444 = 1092 # RUN: ld.lld %t/extreme.o --section-start=.rodata=0x44433333abcde111 --section-start=.text=0x0000000012345678 -o %t/extreme2 # RUN: llvm-objdump -d --no-show-raw-insn %t/extreme2 | FileCheck %s --check-prefix=EXTREME2 -# EXTREME2: addi.d $t0, $zero, 273 -# EXTREME2-NEXT: pcalau12i $t1, -419431 +# EXTREME2: pcalau12i $t1, -419431 +# EXTREME2-NEXT: addi.d $t0, $zero, 273 # EXTREME2-NEXT: lu32i.d $t0, 209716 # EXTREME2-NEXT: lu52i.d $t0, $t0, 1092 @@ -111,8 +111,8 @@ ## %pc64_hi12 = 0x444 = 1092 # RUN: ld.lld %t/extreme.o --section-start=.rodata=0x44433333abcde888 --section-start=.text=0x0000000012345678 -o %t/extreme3 # RUN: llvm-objdump -d --no-show-raw-insn %t/extreme3 | FileCheck %s --check-prefix=EXTREME3 -# EXTREME3: addi.d $t0, $zero, -1912 -# EXTREME3-NEXT: pcalau12i $t1, -419430 +# EXTREME3: pcalau12i $t1, -419430 +# EXTREME3-NEXT: addi.d $t0, $zero, -1912 # EXTREME3-NEXT: lu32i.d $t0, 209715 # EXTREME3-NEXT: lu52i.d $t0, $t0, 1092 @@ -123,8 +123,8 @@ ## %pc64_hi12 = 0x444 = 1092 # RUN: ld.lld %t/extreme.o --section-start=.rodata=0x444aaaaa34567111 --section-start=.text=0x0000000012345678 -o %t/extreme4 # RUN: llvm-objdump -d --no-show-raw-insn %t/extreme4 | FileCheck %s --check-prefix=EXTREME4 -# EXTREME4: addi.d $t0, $zero, 273 -# EXTREME4-NEXT: pcalau12i $t1, 139810 +# EXTREME4: pcalau12i $t1, 139810 +# EXTREME4-NEXT: addi.d $t0, $zero, 273 # EXTREME4-NEXT: lu32i.d $t0, -349526 # EXTREME4-NEXT: lu52i.d $t0, $t0, 1092 @@ -135,8 +135,8 @@ ## %pc64_hi12 = 0x444 = 1092 # RUN: ld.lld %t/extreme.o --section-start=.rodata=0x444aaaaa34567888 --section-start=.text=0x0000000012345678 -o %t/extreme5 # RUN: llvm-objdump -d --no-show-raw-insn %t/extreme5 | FileCheck %s --check-prefix=EXTREME5 -# EXTREME5: addi.d $t0, $zero, -1912 -# EXTREME5-NEXT: pcalau12i $t1, 139811 +# EXTREME5: pcalau12i $t1, 139811 +# EXTREME5-NEXT: addi.d $t0, $zero, -1912 # EXTREME5-NEXT: lu32i.d $t0, -349527 # EXTREME5-NEXT: lu52i.d $t0, $t0, 1092 @@ -147,8 +147,8 @@ ## %pc64_hi12 = 0x444 = 1092 # RUN: ld.lld %t/extreme.o --section-start=.rodata=0x444aaaaaabcde111 --section-start=.text=0x0000000012345678 -o %t/extreme6 # RUN: llvm-objdump -d --no-show-raw-insn %t/extreme6 | FileCheck %s --check-prefix=EXTREME6 -# EXTREME6: addi.d $t0, $zero, 273 -# EXTREME6-NEXT: pcalau12i $t1, -419431 +# EXTREME6: pcalau12i $t1, -419431 +# EXTREME6-NEXT: addi.d $t0, $zero, 273 # EXTREME6-NEXT: lu32i.d $t0, -349525 # EXTREME6-NEXT: lu52i.d $t0, $t0, 1092 @@ -159,8 +159,8 @@ ## %pc64_hi12 = 0x444 = 1092 # RUN: ld.lld %t/extreme.o --section-start=.rodata=0x444aaaaaabcde888 --section-start=.text=0x0000000012345678 -o %t/extreme7 # RUN: llvm-objdump -d --no-show-raw-insn %t/extreme7 | FileCheck %s --check-prefix=EXTREME7 -# EXTREME7: addi.d $t0, $zero, -1912 -# EXTREME7-NEXT: pcalau12i $t1, -419430 +# EXTREME7: pcalau12i $t1, -419430 +# EXTREME7-NEXT: addi.d $t0, $zero, -1912 # EXTREME7-NEXT: lu32i.d $t0, -349526 # EXTREME7-NEXT: lu52i.d $t0, $t0, 1092 @@ -171,8 +171,8 @@ ## %pc64_hi12 = 0xbbb = -1093 # RUN: ld.lld %t/extreme.o --section-start=.rodata=0xbbb3333334567111 --section-start=.text=0x0000000012345678 -o %t/extreme8 # RUN: llvm-objdump -d --no-show-raw-insn %t/extreme8 | FileCheck %s --check-prefix=EXTREME8 -# EXTREME8: addi.d $t0, $zero, 273 -# EXTREME8-NEXT: pcalau12i $t1, 139810 +# EXTREME8: pcalau12i $t1, 139810 +# EXTREME8-NEXT: addi.d $t0, $zero, 273 # EXTREME8-NEXT: lu32i.d $t0, 209715 # EXTREME8-NEXT: lu52i.d $t0, $t0, -1093 @@ -183,8 +183,8 @@ ## %pc64_hi12 = 0xbbb = -1093 # RUN: ld.lld %t/extreme.o --section-start=.rodata=0xbbb3333334567888 --section-start=.text=0x0000000012345678 -o %t/extreme9 # RUN: llvm-objdump -d --no-show-raw-insn %t/extreme9 | FileCheck %s --check-prefix=EXTREME9 -# EXTREME9: addi.d $t0, $zero, -1912 -# EXTREME9-NEXT: pcalau12i $t1, 139811 +# EXTREME9: pcalau12i $t1, 139811 +# EXTREME9-NEXT: addi.d $t0, $zero, -1912 # EXTREME9-NEXT: lu32i.d $t0, 209714 # EXTREME9-NEXT: lu52i.d $t0, $t0, -1093 @@ -195,8 +195,8 @@ ## %pc64_hi12 = 0xbbb = -1093 # RUN: ld.lld %t/extreme.o --section-start=.rodata=0xbbb33333abcde111 --section-start=.text=0x0000000012345678 -o %t/extreme10 # RUN: llvm-objdump -d --no-show-raw-insn %t/extreme10 | FileCheck %s --check-prefix=EXTREME10 -# EXTREME10: addi.d $t0, $zero, 273 -# EXTREME10-NEXT: pcalau12i $t1, -419431 +# EXTREME10: pcalau12i $t1, -419431 +# EXTREME10-NEXT: addi.d $t0, $zero, 273 # EXTREME10-NEXT: lu32i.d $t0, 209716 # EXTREME10-NEXT: lu52i.d $t0, $t0, -1093 @@ -207,8 +207,8 @@ ## %pc64_hi12 = 0xbbb = -1093 # RUN: ld.lld %t/extreme.o --section-start=.rodata=0xbbb33333abcde888 --section-start=.text=0x0000000012345678 -o %t/extreme11 # RUN: llvm-objdump -d --no-show-raw-insn %t/extreme11 | FileCheck %s --check-prefix=EXTREME11 -# EXTREME11: addi.d $t0, $zero, -1912 -# EXTREME11-NEXT: pcalau12i $t1, -419430 +# EXTREME11: pcalau12i $t1, -419430 +# EXTREME11-NEXT: addi.d $t0, $zero, -1912 # EXTREME11-NEXT: lu32i.d $t0, 209715 # EXTREME11-NEXT: lu52i.d $t0, $t0, -1093 @@ -219,8 +219,8 @@ ## %pc64_hi12 = 0xbbb = -1093 # RUN: ld.lld %t/extreme.o --section-start=.rodata=0xbbbaaaaa34567111 --section-start=.text=0x0000000012345678 -o %t/extreme12 # RUN: llvm-objdump -d --no-show-raw-insn %t/extreme12 | FileCheck %s --check-prefix=EXTREME12 -# EXTREME12: addi.d $t0, $zero, 273 -# EXTREME12-NEXT: pcalau12i $t1, 139810 +# EXTREME12: pcalau12i $t1, 139810 +# EXTREME12-NEXT: addi.d $t0, $zero, 273 # EXTREME12-NEXT: lu32i.d $t0, -349526 # EXTREME12-NEXT: lu52i.d $t0, $t0, -1093 @@ -231,8 +231,8 @@ ## %pc64_hi12 = 0xbbb = -1093 # RUN: ld.lld %t/extreme.o --section-start=.rodata=0xbbbaaaaa34567888 --section-start=.text=0x0000000012345678 -o %t/extreme13 # RUN: llvm-objdump -d --no-show-raw-insn %t/extreme13 | FileCheck %s --check-prefix=EXTREME13 -# EXTREME13: addi.d $t0, $zero, -1912 -# EXTREME13-NEXT: pcalau12i $t1, 139811 +# EXTREME13: pcalau12i $t1, 139811 +# EXTREME13-NEXT: addi.d $t0, $zero, -1912 # EXTREME13-NEXT: lu32i.d $t0, -349527 # EXTREME13-NEXT: lu52i.d $t0, $t0, -1093 @@ -243,8 +243,8 @@ ## %pc64_hi12 = 0xbbb = -1093 # RUN: ld.lld %t/extreme.o --section-start=.rodata=0xbbbaaaaaabcde111 --section-start=.text=0x0000000012345678 -o %t/extreme14 # RUN: llvm-objdump -d --no-show-raw-insn %t/extreme14 | FileCheck %s --check-prefix=EXTREME14 -# EXTREME14: addi.d $t0, $zero, 273 -# EXTREME14-NEXT: pcalau12i $t1, -419431 +# EXTREME14: pcalau12i $t1, -419431 +# EXTREME14-NEXT: addi.d $t0, $zero, 273 # EXTREME14-NEXT: lu32i.d $t0, -349525 # EXTREME14-NEXT: lu52i.d $t0, $t0, -1093 @@ -255,11 +255,48 @@ ## %pc64_hi12 = 0xbbb = -1093 # RUN: ld.lld %t/extreme.o --section-start=.rodata=0xbbbaaaaaabcde888 --section-start=.text=0x0000000012345678 -o %t/extreme15 # RUN: llvm-objdump -d --no-show-raw-insn %t/extreme15 | FileCheck %s --check-prefix=EXTREME15 -# EXTREME15: addi.d $t0, $zero, -1912 -# EXTREME15-NEXT: pcalau12i $t1, -419430 +# EXTREME15: pcalau12i $t1, -419430 +# EXTREME15-NEXT: addi.d $t0, $zero, -1912 # EXTREME15-NEXT: lu32i.d $t0, -349526 # EXTREME15-NEXT: lu52i.d $t0, $t0, -1093 +## page delta = 0xffffffff00000000, page offset = 0x888 +## %pc_lo12 = 0x888 = -1912 +## %pc_hi20 = 0x00000 = 0 +## %pc64_lo20 = 0xfffff = -1 +## %pc64_hi12 = 0xfff = -1 +# RUN: ld.lld %t/extreme.o --section-start=.rodata=0x0000000012344888 --section-start=.text=0x0000000012345678 -o %t/extreme16 +# RUN: llvm-objdump -d --no-show-raw-insn %t/extreme16 | FileCheck %s --check-prefix=EXTREME16 +# EXTREME16: pcalau12i $t1, 0 +# EXTREME16-NEXT: addi.d $t0, $zero, -1912 +# EXTREME16-NEXT: lu32i.d $t0, -1 +# EXTREME16-NEXT: lu52i.d $t0, $t0, -1 + +## page delta = 0x0000000080000000, page offset = 0x888 +## %pc_lo12 = 0x888 = -1912 +## %pc_hi20 = 0x80000 = -524288 +## %pc64_lo20 = 0xfffff = 0 +## %pc64_hi12 = 0xfff = 0 +# RUN: ld.lld %t/extreme.o --section-start=.rodata=0x000071238ffff888 --section-start=.text=0x0000712310000678 -o %t/extreme17 +# RUN: llvm-objdump -d --no-show-raw-insn %t/extreme17 | FileCheck %s --check-prefix=EXTREME17 +# EXTREME17: pcalau12i $t1, -524288 +# EXTREME17-NEXT: addi.d $t0, $zero, -1912 +# EXTREME17-NEXT: lu32i.d $t0, 0 +# EXTREME17-NEXT: lu52i.d $t0, $t0, 0 + +## A case that pcalau12i, lu32i.d and lu52i.d are in different pages. +## page delta = 0x0000000080000000, page offset = 0x123 +## %pc_lo12 = 0x111 = 273 +## %pc_hi20 = 0x80000 = -524288 +## %pc64_lo20 = 0x00001 = 1 +## %pc64_hi12 = 0x000 = 0 +# RUN: ld.lld %t/extreme.o --section-start=.rodata=0x80000111 --section-start=.text=0xff8 -o %t/extreme18 +# RUN: llvm-objdump -d --no-show-raw-insn %t/extreme18 | FileCheck %s --check-prefix=EXTREME18 +# EXTREME18: pcalau12i $t1, -524288 +# EXTREME18-NEXT: addi.d $t0, $zero, 273 +# EXTREME18-NEXT: lu32i.d $t0, 1 +# EXTREME18-NEXT: lu52i.d $t0, $t0, 0 + #--- a.s .rodata x: @@ -277,7 +314,7 @@ x: .text .global _start _start: - addi.d $t0, $zero, %pc_lo12(x) pcalau12i $t1, %pc_hi20(x) + addi.d $t0, $zero, %pc_lo12(x) lu32i.d $t0, %pc64_lo20(x) lu52i.d $t0, $t0, %pc64_hi12(x) diff --git a/llvm-build/toolchain_readme.md b/llvm-build/toolchain_readme.md index de059bf7752f740cb6449e3da07f8f77591a4f78..1e278a560ce74773790daa54fc3fa842bb9dc226 100644 --- a/llvm-build/toolchain_readme.md +++ b/llvm-build/toolchain_readme.md @@ -18,6 +18,7 @@ Despite all the components provided by LLVM community, we included several tripl | arm-linux-ohos | ARM 32bits | Linux | Small system | | arm-linux-ohos | ARM 32bits | Linux | Standard system | | aarch64-linux-ohos | ARM 64bits | Linux | Standard system | +| loongarch64-linux-ohos | LoongArch64 | Linux | Standard system | For detailed definition of Small System and Standard System, please refer to [System Types](https://gitee.com/openharmony/docs/blob/master/en/device-dev/Readme-EN.md). diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 8c80c05ee785cd21b31165978e27e09eb6edf02d..c31def0330aa65d6f54ca64a6cd43aef95147b80 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -722,6 +722,13 @@ information. Attaching section information to an external declaration is an assertion that its definition is located in the specified section. If the definition is located in a different section, the behavior is undefined. +LLVM allows an explicit code model to be specified for globals. If the +target supports it, it will emit globals in the code model specified, +overriding the code model used to compile the translation unit. +The allowed values are "tiny", "small", "kernel", "medium", "large". +This may be extended in the future to specify global data layout that +doesn't cleanly fit into a specific code model. + By default, global initializers are optimized by assuming that global variables defined within the module are not modified from their initial values before the start of the global initializer. This is @@ -770,6 +777,7 @@ Syntax:: [] [, section "name"] [, partition "name"] [, comdat [($name)]] [, align ] + [, code_model "model"] [, no_sanitize_address] [, no_sanitize_hwaddress] [, sanitize_address_dyninit] [, sanitize_memtag] (, !name !N)* @@ -787,6 +795,13 @@ The following example just declares a global variable @G = external global i32 +The following example defines a global variable with the +``large`` code model: + +.. code-block:: llvm + + @G = internal global i32 0, code_model "large" + The following example defines a thread-local global with the ``initialexec`` TLS model: diff --git a/llvm/docs/TableGen/ProgRef.rst b/llvm/docs/TableGen/ProgRef.rst index 0bafc92ae895307368e6b7feaed6d28bdc8b754f..ca6ad0b8ed8e9cf2b0fb1831e9d93f01e6c7a4f5 100644 --- a/llvm/docs/TableGen/ProgRef.rst +++ b/llvm/docs/TableGen/ProgRef.rst @@ -1626,6 +1626,12 @@ and non-0 as true. This operator produces 1 if the string, list, or DAG *a* is empty; 0 otherwise. A dag is empty if it has no arguments; the operator does not count. +``!tolower(``\ *a*\ ``)`` + This operator converts a string input *a* to lower case. + +``!toupper(``\ *a*\ ``)`` + This operator converts a string input *a* to upper case. + ``!eq(`` *a*\ `,` *b*\ ``)`` This operator produces 1 if *a* is equal to *b*; 0 otherwise. The arguments must be ``bit``, ``bits``, ``int``, ``string``, or diff --git a/llvm/include/llvm/AsmParser/LLParser.h b/llvm/include/llvm/AsmParser/LLParser.h index 3389475b2c9aac1662cf9ed3c22b21e784fdce2b..6e583e24a52a9f0dd3e160c9993ddec3704da3ad 100644 --- a/llvm/include/llvm/AsmParser/LLParser.h +++ b/llvm/include/llvm/AsmParser/LLParser.h @@ -272,6 +272,7 @@ namespace llvm { bool parseOptionalCallingConv(unsigned &CC); bool parseOptionalAlignment(MaybeAlign &Alignment, bool AllowParens = false); + bool parseOptionalCodeModel(CodeModel::Model &model); bool parseOptionalDerefAttrBytes(lltok::Kind AttrKind, uint64_t &Bytes); bool parseOptionalUWTableKind(UWTableKind &Kind); bool parseAllocKind(AllocFnKind &Kind); diff --git a/llvm/include/llvm/AsmParser/LLToken.h b/llvm/include/llvm/AsmParser/LLToken.h index fe0e3f3a0460005dba4807c25e74aeec94cd57df..a3803fa835dd65fcfc14c4710b0ea5fc50054aba 100644 --- a/llvm/include/llvm/AsmParser/LLToken.h +++ b/llvm/include/llvm/AsmParser/LLToken.h @@ -114,6 +114,7 @@ enum Kind { kw_addrspace, kw_section, kw_partition, + kw_code_model, kw_alias, kw_ifunc, kw_module, diff --git a/llvm/include/llvm/BinaryFormat/ELFRelocs/LoongArch.def b/llvm/include/llvm/BinaryFormat/ELFRelocs/LoongArch.def index 02bce3c71712743cb2951b0df9a6c304dcd54c32..c4393432677b8e3e818460a4e2f42ec4abc55b4d 100644 --- a/llvm/include/llvm/BinaryFormat/ELFRelocs/LoongArch.def +++ b/llvm/include/llvm/BinaryFormat/ELFRelocs/LoongArch.def @@ -118,3 +118,9 @@ ELF_RELOC(R_LARCH_SUB6, 106) ELF_RELOC(R_LARCH_ADD_ULEB128, 107) ELF_RELOC(R_LARCH_SUB_ULEB128, 108) ELF_RELOC(R_LARCH_64_PCREL, 109) + +// Relocs added in ELF for the LoongArchâ„¢ Architecture v20231102, part of the +// v2.20 LoongArch ABI specs. +// +// Spec addition: https://github.com/loongson/la-abi-specs/pull/4 +ELF_RELOC(R_LARCH_CALL36, 110) diff --git a/llvm/include/llvm/IR/GlobalObject.h b/llvm/include/llvm/IR/GlobalObject.h index 96a27031668678b7c859120296724f7f69ed5010..170eeb46bf12f86cb0e387984b7a53b2614917a1 100644 --- a/llvm/include/llvm/IR/GlobalObject.h +++ b/llvm/include/llvm/IR/GlobalObject.h @@ -51,6 +51,7 @@ protected: Comdat *ObjComdat = nullptr; enum { LastAlignmentBit = 5, + LastCodeModelBit = 8, HasSectionHashEntryBit, GlobalObjectBits, diff --git a/llvm/include/llvm/IR/GlobalVariable.h b/llvm/include/llvm/IR/GlobalVariable.h index e772964fcc6b5ab36b21cb375bde6bd6124fe8c4..4ec7973450a301aa4aee49e94580ac8ed8a8694d 100644 --- a/llvm/include/llvm/IR/GlobalVariable.h +++ b/llvm/include/llvm/IR/GlobalVariable.h @@ -46,6 +46,11 @@ class GlobalVariable : public GlobalObject, public ilist_node { // value before global // initializers are run? +private: + static const unsigned CodeModelBits = LastCodeModelBit - LastAlignmentBit; + static const unsigned CodeModelMask = (1 << CodeModelBits) - 1; + static const unsigned CodeModelShift = LastAlignmentBit + 1; + public: /// GlobalVariable ctor - If a parent module is specified, the global is /// automatically inserted into the end of the specified modules global list. @@ -246,6 +251,28 @@ public: getAttributes().hasAttribute("rodata-section"); } + /// Get the custom code model raw value of this global. + /// + unsigned getCodeModelRaw() const { + unsigned Data = getGlobalValueSubClassData(); + return (Data >> CodeModelShift) & CodeModelMask; + } + + /// Get the custom code model of this global if it has one. + /// + /// If this global does not have a custom code model, the empty instance + /// will be returned. + Optional getCodeModel() const { + unsigned CodeModelData = getCodeModelRaw(); + if (CodeModelData > 0) + return static_cast(CodeModelData - 1); + return {}; + } + + /// Change the code model for this global. + /// + void setCodeModel(CodeModel::Model CM); + // Methods for support type inquiry through isa, cast, and dyn_cast: static bool classof(const Value *V) { return V->getValueID() == Value::GlobalVariableVal; diff --git a/llvm/include/llvm/IR/IntrinsicsLoongArch.td b/llvm/include/llvm/IR/IntrinsicsLoongArch.td index 5edce3c529e1217514b67f6ef3f1d4a9d8a8fc61..685deaec7709bd7896a65fedcd121b6a3d410a45 100644 --- a/llvm/include/llvm/IR/IntrinsicsLoongArch.td +++ b/llvm/include/llvm/IR/IntrinsicsLoongArch.td @@ -51,74 +51,1122 @@ defm int_loongarch_masked_cmpxchg : MaskedAtomicRMWFiveOpIntrinsics; //===----------------------------------------------------------------------===// // LoongArch BASE -def int_loongarch_break : Intrinsic<[], [llvm_i32_ty], [ImmArg>]>; -def int_loongarch_cacop_d : Intrinsic<[], [llvm_i64_ty, llvm_i64_ty, llvm_i64_ty], - [ImmArg>, ImmArg>]>; -def int_loongarch_cacop_w : Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], - [ImmArg>, ImmArg>]>; -def int_loongarch_dbar : Intrinsic<[], [llvm_i32_ty], [ImmArg>]>; -def int_loongarch_ibar : Intrinsic<[], [llvm_i32_ty], [ImmArg>]>; -def int_loongarch_movfcsr2gr : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], - [ImmArg>]>; -def int_loongarch_movgr2fcsr : Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], - [ImmArg>]>; -def int_loongarch_syscall : Intrinsic<[], [llvm_i32_ty], [ImmArg>]>; - -def int_loongarch_crc_w_b_w : Intrinsic<[llvm_i32_ty], - [llvm_i32_ty, llvm_i32_ty]>; -def int_loongarch_crc_w_h_w : Intrinsic<[llvm_i32_ty], - [llvm_i32_ty, llvm_i32_ty]>; -def int_loongarch_crc_w_w_w : Intrinsic<[llvm_i32_ty], - [llvm_i32_ty, llvm_i32_ty]>; -def int_loongarch_crc_w_d_w : Intrinsic<[llvm_i32_ty], - [llvm_i64_ty, llvm_i32_ty]>; - -def int_loongarch_crcc_w_b_w : Intrinsic<[llvm_i32_ty], - [llvm_i32_ty, llvm_i32_ty]>; -def int_loongarch_crcc_w_h_w : Intrinsic<[llvm_i32_ty], - [llvm_i32_ty, llvm_i32_ty]>; -def int_loongarch_crcc_w_w_w : Intrinsic<[llvm_i32_ty], - [llvm_i32_ty, llvm_i32_ty]>; -def int_loongarch_crcc_w_d_w : Intrinsic<[llvm_i32_ty], - [llvm_i64_ty, llvm_i32_ty]>; - -def int_loongarch_csrrd_w : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], - [ImmArg>]>; -def int_loongarch_csrrd_d : Intrinsic<[llvm_i64_ty], [llvm_i32_ty], - [ImmArg>]>; -def int_loongarch_csrwr_w : Intrinsic<[llvm_i32_ty], - [llvm_i32_ty, llvm_i32_ty], - [ImmArg>]>; -def int_loongarch_csrwr_d : Intrinsic<[llvm_i64_ty], - [llvm_i64_ty, llvm_i32_ty], - [ImmArg>]>; -def int_loongarch_csrxchg_w : Intrinsic<[llvm_i32_ty], - [llvm_i32_ty, llvm_i32_ty, - llvm_i32_ty], - [ImmArg>]>; -def int_loongarch_csrxchg_d : Intrinsic<[llvm_i64_ty], - [llvm_i64_ty, llvm_i64_ty, - llvm_i32_ty], - [ImmArg>]>; - -def int_loongarch_iocsrrd_b : Intrinsic<[llvm_i32_ty], [llvm_i32_ty]>; -def int_loongarch_iocsrrd_h : Intrinsic<[llvm_i32_ty], [llvm_i32_ty]>; -def int_loongarch_iocsrrd_w : Intrinsic<[llvm_i32_ty], [llvm_i32_ty]>; -def int_loongarch_iocsrrd_d : Intrinsic<[llvm_i64_ty], [llvm_i32_ty]>; - -def int_loongarch_iocsrwr_b : Intrinsic<[], [llvm_i32_ty, llvm_i32_ty]>; -def int_loongarch_iocsrwr_h : Intrinsic<[], [llvm_i32_ty, llvm_i32_ty]>; -def int_loongarch_iocsrwr_w : Intrinsic<[], [llvm_i32_ty, llvm_i32_ty]>; -def int_loongarch_iocsrwr_d : Intrinsic<[], [llvm_i64_ty, llvm_i32_ty]>; - -def int_loongarch_cpucfg : Intrinsic<[llvm_i32_ty], [llvm_i32_ty]>; - -def int_loongarch_asrtle_d : Intrinsic<[], [llvm_i64_ty, llvm_i64_ty]>; -def int_loongarch_asrtgt_d : Intrinsic<[], [llvm_i64_ty, llvm_i64_ty]>; - -def int_loongarch_lddir_d : Intrinsic<[llvm_i64_ty], - [llvm_i64_ty, llvm_i64_ty], - [ImmArg>]>; -def int_loongarch_ldpte_d : Intrinsic<[], [llvm_i64_ty, llvm_i64_ty], - [ImmArg>]>; +class BaseInt ret_types, list param_types, + list intr_properties = []> + : Intrinsic, + ClangBuiltin; + +def int_loongarch_break : BaseInt<[], [llvm_i32_ty], [ImmArg>]>; +def int_loongarch_cacop_d : BaseInt<[], [llvm_i64_ty, llvm_i64_ty, llvm_i64_ty], + [ImmArg>, ImmArg>]>; +def int_loongarch_cacop_w : BaseInt<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [ImmArg>, ImmArg>]>; +def int_loongarch_dbar : BaseInt<[], [llvm_i32_ty], [ImmArg>]>; + +def int_loongarch_ibar : BaseInt<[], [llvm_i32_ty], [ImmArg>]>; +def int_loongarch_movfcsr2gr : BaseInt<[llvm_i32_ty], [llvm_i32_ty], + [ImmArg>]>; +def int_loongarch_movgr2fcsr : BaseInt<[], [llvm_i32_ty, llvm_i32_ty], + [ImmArg>]>; +def int_loongarch_syscall : BaseInt<[], [llvm_i32_ty], [ImmArg>]>; + +def int_loongarch_crc_w_b_w : BaseInt<[llvm_i32_ty], + [llvm_i32_ty, llvm_i32_ty]>; +def int_loongarch_crc_w_h_w : BaseInt<[llvm_i32_ty], + [llvm_i32_ty, llvm_i32_ty]>; +def int_loongarch_crc_w_w_w : BaseInt<[llvm_i32_ty], + [llvm_i32_ty, llvm_i32_ty]>; +def int_loongarch_crc_w_d_w : BaseInt<[llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty]>; + +def int_loongarch_crcc_w_b_w : BaseInt<[llvm_i32_ty], + [llvm_i32_ty, llvm_i32_ty]>; +def int_loongarch_crcc_w_h_w : BaseInt<[llvm_i32_ty], + [llvm_i32_ty, llvm_i32_ty]>; +def int_loongarch_crcc_w_w_w : BaseInt<[llvm_i32_ty], + [llvm_i32_ty, llvm_i32_ty]>; +def int_loongarch_crcc_w_d_w : BaseInt<[llvm_i32_ty], + [llvm_i64_ty, llvm_i32_ty]>; + +def int_loongarch_csrrd_w : BaseInt<[llvm_i32_ty], [llvm_i32_ty], + [ImmArg>]>; +def int_loongarch_csrrd_d : BaseInt<[llvm_i64_ty], [llvm_i32_ty], + [ImmArg>]>; +def int_loongarch_csrwr_w : BaseInt<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], + [ImmArg>]>; +def int_loongarch_csrwr_d : BaseInt<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty], + [ImmArg>]>; +def int_loongarch_csrxchg_w : BaseInt<[llvm_i32_ty], + [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [ImmArg>]>; +def int_loongarch_csrxchg_d : BaseInt<[llvm_i64_ty], + [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty], + [ImmArg>]>; + +def int_loongarch_iocsrrd_b : BaseInt<[llvm_i32_ty], [llvm_i32_ty]>; +def int_loongarch_iocsrrd_h : BaseInt<[llvm_i32_ty], [llvm_i32_ty]>; +def int_loongarch_iocsrrd_w : BaseInt<[llvm_i32_ty], [llvm_i32_ty]>; +def int_loongarch_iocsrrd_d : BaseInt<[llvm_i64_ty], [llvm_i32_ty]>; + +def int_loongarch_iocsrwr_b : BaseInt<[], [llvm_i32_ty, llvm_i32_ty]>; +def int_loongarch_iocsrwr_h : BaseInt<[], [llvm_i32_ty, llvm_i32_ty]>; +def int_loongarch_iocsrwr_w : BaseInt<[], [llvm_i32_ty, llvm_i32_ty]>; +def int_loongarch_iocsrwr_d : BaseInt<[], [llvm_i64_ty, llvm_i32_ty]>; + +def int_loongarch_cpucfg : BaseInt<[llvm_i32_ty], [llvm_i32_ty]>; + +def int_loongarch_asrtle_d : BaseInt<[], [llvm_i64_ty, llvm_i64_ty]>; +def int_loongarch_asrtgt_d : BaseInt<[], [llvm_i64_ty, llvm_i64_ty]>; + +def int_loongarch_lddir_d : BaseInt<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], + [ImmArg>]>; +def int_loongarch_ldpte_d : BaseInt<[], [llvm_i64_ty, llvm_i64_ty], + [ImmArg>]>; +} // TargetPrefix = "loongarch" + +/// Vector intrinsic + +class VecInt ret_types, list param_types, + list intr_properties = []> + : Intrinsic, + ClangBuiltin; + +//===----------------------------------------------------------------------===// +// LSX + +let TargetPrefix = "loongarch" in { + +foreach inst = ["vadd_b", "vsub_b", + "vsadd_b", "vsadd_bu", "vssub_b", "vssub_bu", + "vavg_b", "vavg_bu", "vavgr_b", "vavgr_bu", + "vabsd_b", "vabsd_bu", "vadda_b", + "vmax_b", "vmax_bu", "vmin_b", "vmin_bu", + "vmul_b", "vmuh_b", "vmuh_bu", + "vdiv_b", "vdiv_bu", "vmod_b", "vmod_bu", "vsigncov_b", + "vand_v", "vor_v", "vxor_v", "vnor_v", "vandn_v", "vorn_v", + "vsll_b", "vsrl_b", "vsra_b", "vrotr_b", "vsrlr_b", "vsrar_b", + "vbitclr_b", "vbitset_b", "vbitrev_b", + "vseq_b", "vsle_b", "vsle_bu", "vslt_b", "vslt_bu", + "vpackev_b", "vpackod_b", "vpickev_b", "vpickod_b", + "vilvl_b", "vilvh_b"] in + def int_loongarch_lsx_#inst : VecInt<[llvm_v16i8_ty], + [llvm_v16i8_ty, llvm_v16i8_ty], + [IntrNoMem]>; + +foreach inst = ["vadd_h", "vsub_h", + "vsadd_h", "vsadd_hu", "vssub_h", "vssub_hu", + "vavg_h", "vavg_hu", "vavgr_h", "vavgr_hu", + "vabsd_h", "vabsd_hu", "vadda_h", + "vmax_h", "vmax_hu", "vmin_h", "vmin_hu", + "vmul_h", "vmuh_h", "vmuh_hu", + "vdiv_h", "vdiv_hu", "vmod_h", "vmod_hu", "vsigncov_h", + "vsll_h", "vsrl_h", "vsra_h", "vrotr_h", "vsrlr_h", "vsrar_h", + "vbitclr_h", "vbitset_h", "vbitrev_h", + "vseq_h", "vsle_h", "vsle_hu", "vslt_h", "vslt_hu", + "vpackev_h", "vpackod_h", "vpickev_h", "vpickod_h", + "vilvl_h", "vilvh_h"] in + def int_loongarch_lsx_#inst : VecInt<[llvm_v8i16_ty], + [llvm_v8i16_ty, llvm_v8i16_ty], + [IntrNoMem]>; + +foreach inst = ["vadd_w", "vsub_w", + "vsadd_w", "vsadd_wu", "vssub_w", "vssub_wu", + "vavg_w", "vavg_wu", "vavgr_w", "vavgr_wu", + "vabsd_w", "vabsd_wu", "vadda_w", + "vmax_w", "vmax_wu", "vmin_w", "vmin_wu", + "vmul_w", "vmuh_w", "vmuh_wu", + "vdiv_w", "vdiv_wu", "vmod_w", "vmod_wu", "vsigncov_w", + "vsll_w", "vsrl_w", "vsra_w", "vrotr_w", "vsrlr_w", "vsrar_w", + "vbitclr_w", "vbitset_w", "vbitrev_w", + "vseq_w", "vsle_w", "vsle_wu", "vslt_w", "vslt_wu", + "vpackev_w", "vpackod_w", "vpickev_w", "vpickod_w", + "vilvl_w", "vilvh_w"] in + def int_loongarch_lsx_#inst : VecInt<[llvm_v4i32_ty], + [llvm_v4i32_ty, llvm_v4i32_ty], + [IntrNoMem]>; + +foreach inst = ["vadd_d", "vadd_q", "vsub_d", "vsub_q", + "vsadd_d", "vsadd_du", "vssub_d", "vssub_du", + "vhaddw_q_d", "vhaddw_qu_du", "vhsubw_q_d", "vhsubw_qu_du", + "vaddwev_q_d", "vaddwod_q_d", "vsubwev_q_d", "vsubwod_q_d", + "vaddwev_q_du", "vaddwod_q_du", "vsubwev_q_du", "vsubwod_q_du", + "vaddwev_q_du_d", "vaddwod_q_du_d", + "vavg_d", "vavg_du", "vavgr_d", "vavgr_du", + "vabsd_d", "vabsd_du", "vadda_d", + "vmax_d", "vmax_du", "vmin_d", "vmin_du", + "vmul_d", "vmuh_d", "vmuh_du", + "vmulwev_q_d", "vmulwod_q_d", "vmulwev_q_du", "vmulwod_q_du", + "vmulwev_q_du_d", "vmulwod_q_du_d", + "vdiv_d", "vdiv_du", "vmod_d", "vmod_du", "vsigncov_d", + "vsll_d", "vsrl_d", "vsra_d", "vrotr_d", "vsrlr_d", "vsrar_d", + "vbitclr_d", "vbitset_d", "vbitrev_d", + "vseq_d", "vsle_d", "vsle_du", "vslt_d", "vslt_du", + "vpackev_d", "vpackod_d", "vpickev_d", "vpickod_d", + "vilvl_d", "vilvh_d"] in + def int_loongarch_lsx_#inst : VecInt<[llvm_v2i64_ty], + [llvm_v2i64_ty, llvm_v2i64_ty], + [IntrNoMem]>; + +foreach inst = ["vaddi_bu", "vsubi_bu", + "vmaxi_b", "vmaxi_bu", "vmini_b", "vmini_bu", + "vsat_b", "vsat_bu", + "vandi_b", "vori_b", "vxori_b", "vnori_b", + "vslli_b", "vsrli_b", "vsrai_b", "vrotri_b", + "vsrlri_b", "vsrari_b", + "vbitclri_b", "vbitseti_b", "vbitrevi_b", + "vseqi_b", "vslei_b", "vslei_bu", "vslti_b", "vslti_bu", + "vreplvei_b", "vbsll_v", "vbsrl_v", "vshuf4i_b"] in + def int_loongarch_lsx_#inst : VecInt<[llvm_v16i8_ty], + [llvm_v16i8_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; +foreach inst = ["vaddi_hu", "vsubi_hu", + "vmaxi_h", "vmaxi_hu", "vmini_h", "vmini_hu", + "vsat_h", "vsat_hu", + "vslli_h", "vsrli_h", "vsrai_h", "vrotri_h", + "vsrlri_h", "vsrari_h", + "vbitclri_h", "vbitseti_h", "vbitrevi_h", + "vseqi_h", "vslei_h", "vslei_hu", "vslti_h", "vslti_hu", + "vreplvei_h", "vshuf4i_h"] in + def int_loongarch_lsx_#inst : VecInt<[llvm_v8i16_ty], + [llvm_v8i16_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; +foreach inst = ["vaddi_wu", "vsubi_wu", + "vmaxi_w", "vmaxi_wu", "vmini_w", "vmini_wu", + "vsat_w", "vsat_wu", + "vslli_w", "vsrli_w", "vsrai_w", "vrotri_w", + "vsrlri_w", "vsrari_w", + "vbitclri_w", "vbitseti_w", "vbitrevi_w", + "vseqi_w", "vslei_w", "vslei_wu", "vslti_w", "vslti_wu", + "vreplvei_w", "vshuf4i_w"] in + def int_loongarch_lsx_#inst : VecInt<[llvm_v4i32_ty], + [llvm_v4i32_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; +foreach inst = ["vaddi_du", "vsubi_du", + "vmaxi_d", "vmaxi_du", "vmini_d", "vmini_du", + "vsat_d", "vsat_du", + "vslli_d", "vsrli_d", "vsrai_d", "vrotri_d", + "vsrlri_d", "vsrari_d", + "vbitclri_d", "vbitseti_d", "vbitrevi_d", + "vseqi_d", "vslei_d", "vslei_du", "vslti_d", "vslti_du", + "vreplvei_d"] in + def int_loongarch_lsx_#inst : VecInt<[llvm_v2i64_ty], + [llvm_v2i64_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; + +foreach inst = ["vhaddw_h_b", "vhaddw_hu_bu", "vhsubw_h_b", "vhsubw_hu_bu", + "vaddwev_h_b", "vaddwod_h_b", "vsubwev_h_b", "vsubwod_h_b", + "vaddwev_h_bu", "vaddwod_h_bu", "vsubwev_h_bu", "vsubwod_h_bu", + "vaddwev_h_bu_b", "vaddwod_h_bu_b", + "vmulwev_h_b", "vmulwod_h_b", "vmulwev_h_bu", "vmulwod_h_bu", + "vmulwev_h_bu_b", "vmulwod_h_bu_b"] in + def int_loongarch_lsx_#inst : VecInt<[llvm_v8i16_ty], + [llvm_v16i8_ty, llvm_v16i8_ty], + [IntrNoMem]>; + +foreach inst = ["vhaddw_w_h", "vhaddw_wu_hu", "vhsubw_w_h", "vhsubw_wu_hu", + "vaddwev_w_h", "vaddwod_w_h", "vsubwev_w_h", "vsubwod_w_h", + "vaddwev_w_hu", "vaddwod_w_hu", "vsubwev_w_hu", "vsubwod_w_hu", + "vaddwev_w_hu_h", "vaddwod_w_hu_h", + "vmulwev_w_h", "vmulwod_w_h", "vmulwev_w_hu", "vmulwod_w_hu", + "vmulwev_w_hu_h", "vmulwod_w_hu_h"] in + def int_loongarch_lsx_#inst : VecInt<[llvm_v4i32_ty], + [llvm_v8i16_ty, llvm_v8i16_ty], + [IntrNoMem]>; + +foreach inst = ["vhaddw_d_w", "vhaddw_du_wu", "vhsubw_d_w", "vhsubw_du_wu", + "vaddwev_d_w", "vaddwod_d_w", "vsubwev_d_w", "vsubwod_d_w", + "vaddwev_d_wu", "vaddwod_d_wu", "vsubwev_d_wu", "vsubwod_d_wu", + "vaddwev_d_wu_w", "vaddwod_d_wu_w", + "vmulwev_d_w", "vmulwod_d_w", "vmulwev_d_wu", "vmulwod_d_wu", + "vmulwev_d_wu_w", "vmulwod_d_wu_w"] in + def int_loongarch_lsx_#inst : VecInt<[llvm_v2i64_ty], + [llvm_v4i32_ty, llvm_v4i32_ty], + [IntrNoMem]>; + +foreach inst = ["vsrln_b_h", "vsran_b_h", "vsrlrn_b_h", "vsrarn_b_h", + "vssrln_b_h", "vssran_b_h", "vssrln_bu_h", "vssran_bu_h", + "vssrlrn_b_h", "vssrarn_b_h", "vssrlrn_bu_h", "vssrarn_bu_h"] in + def int_loongarch_lsx_#inst : VecInt<[llvm_v16i8_ty], + [llvm_v8i16_ty, llvm_v8i16_ty], + [IntrNoMem]>; + +foreach inst = ["vsrln_h_w", "vsran_h_w", "vsrlrn_h_w", "vsrarn_h_w", + "vssrln_h_w", "vssran_h_w", "vssrln_hu_w", "vssran_hu_w", + "vssrlrn_h_w", "vssrarn_h_w", "vssrlrn_hu_w", "vssrarn_hu_w"] in + def int_loongarch_lsx_#inst : VecInt<[llvm_v8i16_ty], + [llvm_v4i32_ty, llvm_v4i32_ty], + [IntrNoMem]>; + +foreach inst = ["vsrln_w_d", "vsran_w_d", "vsrlrn_w_d", "vsrarn_w_d", + "vssrln_w_d", "vssran_w_d", "vssrln_wu_d", "vssran_wu_d", + "vssrlrn_w_d", "vssrarn_w_d", "vssrlrn_wu_d", "vssrarn_wu_d"] in + def int_loongarch_lsx_#inst : VecInt<[llvm_v4i32_ty], + [llvm_v2i64_ty, llvm_v2i64_ty], + [IntrNoMem]>; + +foreach inst = ["vmadd_b", "vmsub_b", "vfrstp_b", "vbitsel_v", "vshuf_b"] in + def int_loongarch_lsx_#inst + : VecInt<[llvm_v16i8_ty], + [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty], + [IntrNoMem]>; +foreach inst = ["vmadd_h", "vmsub_h", "vfrstp_h", "vshuf_h"] in + def int_loongarch_lsx_#inst + : VecInt<[llvm_v8i16_ty], + [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty], + [IntrNoMem]>; +foreach inst = ["vmadd_w", "vmsub_w", "vshuf_w"] in + def int_loongarch_lsx_#inst + : VecInt<[llvm_v4i32_ty], + [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty], + [IntrNoMem]>; +foreach inst = ["vmadd_d", "vmsub_d", "vshuf_d"] in + def int_loongarch_lsx_#inst + : VecInt<[llvm_v2i64_ty], + [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], + [IntrNoMem]>; + +foreach inst = ["vsrlni_b_h", "vsrani_b_h", "vsrlrni_b_h", "vsrarni_b_h", + "vssrlni_b_h", "vssrani_b_h", "vssrlni_bu_h", "vssrani_bu_h", + "vssrlrni_b_h", "vssrarni_b_h", "vssrlrni_bu_h", "vssrarni_bu_h", + "vfrstpi_b", "vbitseli_b", "vextrins_b"] in + def int_loongarch_lsx_#inst + : VecInt<[llvm_v16i8_ty], + [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; +foreach inst = ["vsrlni_h_w", "vsrani_h_w", "vsrlrni_h_w", "vsrarni_h_w", + "vssrlni_h_w", "vssrani_h_w", "vssrlni_hu_w", "vssrani_hu_w", + "vssrlrni_h_w", "vssrarni_h_w", "vssrlrni_hu_w", "vssrarni_hu_w", + "vfrstpi_h", "vextrins_h"] in + def int_loongarch_lsx_#inst + : VecInt<[llvm_v8i16_ty], + [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; +foreach inst = ["vsrlni_w_d", "vsrani_w_d", "vsrlrni_w_d", "vsrarni_w_d", + "vssrlni_w_d", "vssrani_w_d", "vssrlni_wu_d", "vssrani_wu_d", + "vssrlrni_w_d", "vssrarni_w_d", "vssrlrni_wu_d", "vssrarni_wu_d", + "vpermi_w", "vextrins_w"] in + def int_loongarch_lsx_#inst + : VecInt<[llvm_v4i32_ty], + [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; +foreach inst = ["vsrlni_d_q", "vsrani_d_q", "vsrlrni_d_q", "vsrarni_d_q", + "vssrlni_d_q", "vssrani_d_q", "vssrlni_du_q", "vssrani_du_q", + "vssrlrni_d_q", "vssrarni_d_q", "vssrlrni_du_q", "vssrarni_du_q", + "vshuf4i_d", "vextrins_d"] in + def int_loongarch_lsx_#inst + : VecInt<[llvm_v2i64_ty], + [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; + +foreach inst = ["vmaddwev_h_b", "vmaddwod_h_b", "vmaddwev_h_bu", + "vmaddwod_h_bu", "vmaddwev_h_bu_b", "vmaddwod_h_bu_b"] in + def int_loongarch_lsx_#inst + : VecInt<[llvm_v8i16_ty], + [llvm_v8i16_ty, llvm_v16i8_ty, llvm_v16i8_ty], + [IntrNoMem]>; +foreach inst = ["vmaddwev_w_h", "vmaddwod_w_h", "vmaddwev_w_hu", + "vmaddwod_w_hu", "vmaddwev_w_hu_h", "vmaddwod_w_hu_h"] in + def int_loongarch_lsx_#inst + : VecInt<[llvm_v4i32_ty], + [llvm_v4i32_ty, llvm_v8i16_ty, llvm_v8i16_ty], + [IntrNoMem]>; +foreach inst = ["vmaddwev_d_w", "vmaddwod_d_w", "vmaddwev_d_wu", + "vmaddwod_d_wu", "vmaddwev_d_wu_w", "vmaddwod_d_wu_w"] in + def int_loongarch_lsx_#inst + : VecInt<[llvm_v2i64_ty], + [llvm_v2i64_ty, llvm_v4i32_ty, llvm_v4i32_ty], + [IntrNoMem]>; +foreach inst = ["vmaddwev_q_d", "vmaddwod_q_d", "vmaddwev_q_du", + "vmaddwod_q_du", "vmaddwev_q_du_d", "vmaddwod_q_du_d"] in + def int_loongarch_lsx_#inst + : VecInt<[llvm_v2i64_ty], + [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], + [IntrNoMem]>; + +foreach inst = ["vsllwil_h_b", "vsllwil_hu_bu"] in + def int_loongarch_lsx_#inst : VecInt<[llvm_v8i16_ty], + [llvm_v16i8_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; +foreach inst = ["vsllwil_w_h", "vsllwil_wu_hu"] in + def int_loongarch_lsx_#inst : VecInt<[llvm_v4i32_ty], + [llvm_v8i16_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; +foreach inst = ["vsllwil_d_w", "vsllwil_du_wu"] in + def int_loongarch_lsx_#inst : VecInt<[llvm_v2i64_ty], + [llvm_v4i32_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; + +foreach inst = ["vneg_b", "vmskltz_b", "vmskgez_b", "vmsknz_b", + "vclo_b", "vclz_b", "vpcnt_b"] in + def int_loongarch_lsx_#inst : VecInt<[llvm_v16i8_ty], [llvm_v16i8_ty], + [IntrNoMem]>; +foreach inst = ["vneg_h", "vmskltz_h", "vclo_h", "vclz_h", "vpcnt_h"] in + def int_loongarch_lsx_#inst : VecInt<[llvm_v8i16_ty], [llvm_v8i16_ty], + [IntrNoMem]>; +foreach inst = ["vneg_w", "vmskltz_w", "vclo_w", "vclz_w", "vpcnt_w"] in + def int_loongarch_lsx_#inst : VecInt<[llvm_v4i32_ty], [llvm_v4i32_ty], + [IntrNoMem]>; +foreach inst = ["vneg_d", "vexth_q_d", "vexth_qu_du", "vmskltz_d", + "vextl_q_d", "vextl_qu_du", "vclo_d", "vclz_d", "vpcnt_d"] in + def int_loongarch_lsx_#inst : VecInt<[llvm_v2i64_ty], [llvm_v2i64_ty], + [IntrNoMem]>; + +foreach inst = ["vexth_h_b", "vexth_hu_bu"] in + def int_loongarch_lsx_#inst : VecInt<[llvm_v8i16_ty], [llvm_v16i8_ty], + [IntrNoMem]>; +foreach inst = ["vexth_w_h", "vexth_wu_hu"] in + def int_loongarch_lsx_#inst : VecInt<[llvm_v4i32_ty], [llvm_v8i16_ty], + [IntrNoMem]>; +foreach inst = ["vexth_d_w", "vexth_du_wu"] in + def int_loongarch_lsx_#inst : VecInt<[llvm_v2i64_ty], [llvm_v4i32_ty], + [IntrNoMem]>; + +def int_loongarch_lsx_vldi : VecInt<[llvm_v2i64_ty], [llvm_i32_ty], + [IntrNoMem, ImmArg>]>; +def int_loongarch_lsx_vrepli_b : VecInt<[llvm_v16i8_ty], [llvm_i32_ty], + [IntrNoMem, ImmArg>]>; +def int_loongarch_lsx_vrepli_h : VecInt<[llvm_v8i16_ty], [llvm_i32_ty], + [IntrNoMem, ImmArg>]>; +def int_loongarch_lsx_vrepli_w : VecInt<[llvm_v4i32_ty], [llvm_i32_ty], + [IntrNoMem, ImmArg>]>; +def int_loongarch_lsx_vrepli_d : VecInt<[llvm_v2i64_ty], [llvm_i32_ty], + [IntrNoMem, ImmArg>]>; + +def int_loongarch_lsx_vreplgr2vr_b : VecInt<[llvm_v16i8_ty], [llvm_i32_ty], + [IntrNoMem]>; +def int_loongarch_lsx_vreplgr2vr_h : VecInt<[llvm_v8i16_ty], [llvm_i32_ty], + [IntrNoMem]>; +def int_loongarch_lsx_vreplgr2vr_w : VecInt<[llvm_v4i32_ty], [llvm_i32_ty], + [IntrNoMem]>; +def int_loongarch_lsx_vreplgr2vr_d : VecInt<[llvm_v2i64_ty], [llvm_i64_ty], + [IntrNoMem]>; + +def int_loongarch_lsx_vinsgr2vr_b + : VecInt<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; +def int_loongarch_lsx_vinsgr2vr_h + : VecInt<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; +def int_loongarch_lsx_vinsgr2vr_w + : VecInt<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; +def int_loongarch_lsx_vinsgr2vr_d + : VecInt<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i64_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; + +def int_loongarch_lsx_vreplve_b + : VecInt<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vreplve_h + : VecInt<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vreplve_w + : VecInt<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lsx_vreplve_d + : VecInt<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; + +foreach inst = ["vpickve2gr_b", "vpickve2gr_bu" ] in + def int_loongarch_lsx_#inst : VecInt<[llvm_i32_ty], + [llvm_v16i8_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; +foreach inst = ["vpickve2gr_h", "vpickve2gr_hu" ] in + def int_loongarch_lsx_#inst : VecInt<[llvm_i32_ty], + [llvm_v8i16_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; +foreach inst = ["vpickve2gr_w", "vpickve2gr_wu" ] in + def int_loongarch_lsx_#inst : VecInt<[llvm_i32_ty], + [llvm_v4i32_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; +foreach inst = ["vpickve2gr_d", "vpickve2gr_du" ] in + def int_loongarch_lsx_#inst : VecInt<[llvm_i64_ty], + [llvm_v2i64_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; + +def int_loongarch_lsx_bz_b : VecInt<[llvm_i32_ty], [llvm_v16i8_ty], + [IntrNoMem]>; +def int_loongarch_lsx_bz_h : VecInt<[llvm_i32_ty], [llvm_v8i16_ty], + [IntrNoMem]>; +def int_loongarch_lsx_bz_w : VecInt<[llvm_i32_ty], [llvm_v4i32_ty], + [IntrNoMem]>; +def int_loongarch_lsx_bz_d : VecInt<[llvm_i32_ty], [llvm_v2i64_ty], + [IntrNoMem]>; +def int_loongarch_lsx_bz_v : VecInt<[llvm_i32_ty], [llvm_v16i8_ty], + [IntrNoMem]>; + +def int_loongarch_lsx_bnz_v : VecInt<[llvm_i32_ty], [llvm_v16i8_ty], + [IntrNoMem]>; +def int_loongarch_lsx_bnz_b : VecInt<[llvm_i32_ty], [llvm_v16i8_ty], + [IntrNoMem]>; +def int_loongarch_lsx_bnz_h : VecInt<[llvm_i32_ty], [llvm_v8i16_ty], + [IntrNoMem]>; +def int_loongarch_lsx_bnz_w : VecInt<[llvm_i32_ty], [llvm_v4i32_ty], + [IntrNoMem]>; +def int_loongarch_lsx_bnz_d : VecInt<[llvm_i32_ty], [llvm_v2i64_ty], + [IntrNoMem]>; + +// LSX Float + +foreach inst = ["vfadd_s", "vfsub_s", "vfmul_s", "vfdiv_s", + "vfmax_s", "vfmin_s", "vfmaxa_s", "vfmina_s"] in + def int_loongarch_lsx_#inst : VecInt<[llvm_v4f32_ty], + [llvm_v4f32_ty, llvm_v4f32_ty], + [IntrNoMem]>; +foreach inst = ["vfadd_d", "vfsub_d", "vfmul_d", "vfdiv_d", + "vfmax_d", "vfmin_d", "vfmaxa_d", "vfmina_d"] in + def int_loongarch_lsx_#inst : VecInt<[llvm_v2f64_ty], + [llvm_v2f64_ty, llvm_v2f64_ty], + [IntrNoMem]>; + +foreach inst = ["vfmadd_s", "vfmsub_s", "vfnmadd_s", "vfnmsub_s"] in + def int_loongarch_lsx_#inst + : VecInt<[llvm_v4f32_ty], + [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty], + [IntrNoMem]>; +foreach inst = ["vfmadd_d", "vfmsub_d", "vfnmadd_d", "vfnmsub_d"] in + def int_loongarch_lsx_#inst + : VecInt<[llvm_v2f64_ty], + [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty], + [IntrNoMem]>; + +foreach inst = ["vflogb_s", "vfsqrt_s", "vfrecip_s", "vfrsqrt_s", "vfrint_s", + "vfrintrne_s", "vfrintrz_s", "vfrintrp_s", "vfrintrm_s"] in + def int_loongarch_lsx_#inst : VecInt<[llvm_v4f32_ty], [llvm_v4f32_ty], + [IntrNoMem]>; +foreach inst = ["vflogb_d", "vfsqrt_d", "vfrecip_d", "vfrsqrt_d", "vfrint_d", + "vfrintrne_d", "vfrintrz_d", "vfrintrp_d", "vfrintrm_d"] in + def int_loongarch_lsx_#inst : VecInt<[llvm_v2f64_ty], [llvm_v2f64_ty], + [IntrNoMem]>; + +foreach inst = ["vfcvtl_s_h", "vfcvth_s_h"] in + def int_loongarch_lsx_#inst : VecInt<[llvm_v4f32_ty], [llvm_v8i16_ty], + [IntrNoMem]>; +foreach inst = ["vfcvtl_d_s", "vfcvth_d_s"] in + def int_loongarch_lsx_#inst : VecInt<[llvm_v2f64_ty], [llvm_v4f32_ty], + [IntrNoMem]>; + +foreach inst = ["vftintrne_w_s", "vftintrz_w_s", "vftintrp_w_s", "vftintrm_w_s", + "vftint_w_s", "vftintrz_wu_s", "vftint_wu_s", "vfclass_s"] in + def int_loongarch_lsx_#inst : VecInt<[llvm_v4i32_ty], [llvm_v4f32_ty], + [IntrNoMem]>; +foreach inst = ["vftintrne_l_d", "vftintrz_l_d", "vftintrp_l_d", "vftintrm_l_d", + "vftint_l_d", "vftintrz_lu_d", "vftint_lu_d", "vfclass_d"] in + def int_loongarch_lsx_#inst : VecInt<[llvm_v2i64_ty], [llvm_v2f64_ty], + [IntrNoMem]>; + +foreach inst = ["vftintrnel_l_s", "vftintrneh_l_s", "vftintrzl_l_s", + "vftintrzh_l_s", "vftintrpl_l_s", "vftintrph_l_s", + "vftintrml_l_s", "vftintrmh_l_s", "vftintl_l_s", + "vftinth_l_s"] in + def int_loongarch_lsx_#inst : VecInt<[llvm_v2i64_ty], [llvm_v4f32_ty], + [IntrNoMem]>; + +foreach inst = ["vffint_s_w", "vffint_s_wu"] in + def int_loongarch_lsx_#inst : VecInt<[llvm_v4f32_ty], [llvm_v4i32_ty], + [IntrNoMem]>; +foreach inst = ["vffint_d_l", "vffint_d_lu"] in + def int_loongarch_lsx_#inst : VecInt<[llvm_v2f64_ty], [llvm_v2i64_ty], + [IntrNoMem]>; + +foreach inst = ["vffintl_d_w", "vffinth_d_w"] in + def int_loongarch_lsx_#inst : VecInt<[llvm_v2f64_ty], [llvm_v4i32_ty], + [IntrNoMem]>; + +foreach inst = ["vffint_s_l"] in + def int_loongarch_lsx_#inst : VecInt<[llvm_v4f32_ty], + [llvm_v2i64_ty, llvm_v2i64_ty], + [IntrNoMem]>; +foreach inst = ["vftintrne_w_d", "vftintrz_w_d", "vftintrp_w_d", "vftintrm_w_d", + "vftint_w_d"] in + def int_loongarch_lsx_#inst : VecInt<[llvm_v4i32_ty], + [llvm_v2f64_ty, llvm_v2f64_ty], + [IntrNoMem]>; + +foreach inst = ["vfcvt_h_s"] in + def int_loongarch_lsx_#inst : VecInt<[llvm_v8i16_ty], + [llvm_v4f32_ty, llvm_v4f32_ty], + [IntrNoMem]>; +foreach inst = ["vfcvt_s_d"] in + def int_loongarch_lsx_#inst : VecInt<[llvm_v4f32_ty], + [llvm_v2f64_ty, llvm_v2f64_ty], + [IntrNoMem]>; + +foreach inst = ["vfcmp_caf_s", "vfcmp_cun_s", "vfcmp_ceq_s", "vfcmp_cueq_s", + "vfcmp_clt_s", "vfcmp_cult_s", "vfcmp_cle_s", "vfcmp_cule_s", + "vfcmp_cne_s", "vfcmp_cor_s", "vfcmp_cune_s", + "vfcmp_saf_s", "vfcmp_sun_s", "vfcmp_seq_s", "vfcmp_sueq_s", + "vfcmp_slt_s", "vfcmp_sult_s", "vfcmp_sle_s", "vfcmp_sule_s", + "vfcmp_sne_s", "vfcmp_sor_s", "vfcmp_sune_s"] in + def int_loongarch_lsx_#inst : VecInt<[llvm_v4i32_ty], + [llvm_v4f32_ty, llvm_v4f32_ty], + [IntrNoMem]>; +foreach inst = ["vfcmp_caf_d", "vfcmp_cun_d", "vfcmp_ceq_d", "vfcmp_cueq_d", + "vfcmp_clt_d", "vfcmp_cult_d", "vfcmp_cle_d", "vfcmp_cule_d", + "vfcmp_cne_d", "vfcmp_cor_d", "vfcmp_cune_d", + "vfcmp_saf_d", "vfcmp_sun_d", "vfcmp_seq_d", "vfcmp_sueq_d", + "vfcmp_slt_d", "vfcmp_sult_d", "vfcmp_sle_d", "vfcmp_sule_d", + "vfcmp_sne_d", "vfcmp_sor_d", "vfcmp_sune_d"] in + def int_loongarch_lsx_#inst : VecInt<[llvm_v2i64_ty], + [llvm_v2f64_ty, llvm_v2f64_ty], + [IntrNoMem]>; + +// LSX load/store +def int_loongarch_lsx_vld + : VecInt<[llvm_v16i8_ty], [llvm_ptr_ty, llvm_i32_ty], + [IntrReadMem, IntrArgMemOnly, ImmArg>]>; +def int_loongarch_lsx_vldx + : VecInt<[llvm_v16i8_ty], [llvm_ptr_ty, llvm_i64_ty], + [IntrReadMem, IntrArgMemOnly]>; +def int_loongarch_lsx_vldrepl_b + : VecInt<[llvm_v16i8_ty], [llvm_ptr_ty, llvm_i32_ty], + [IntrReadMem, IntrArgMemOnly, ImmArg>]>; +def int_loongarch_lsx_vldrepl_h + : VecInt<[llvm_v8i16_ty], [llvm_ptr_ty, llvm_i32_ty], + [IntrReadMem, IntrArgMemOnly, ImmArg>]>; +def int_loongarch_lsx_vldrepl_w + : VecInt<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i32_ty], + [IntrReadMem, IntrArgMemOnly, ImmArg>]>; +def int_loongarch_lsx_vldrepl_d + : VecInt<[llvm_v2i64_ty], [llvm_ptr_ty, llvm_i32_ty], + [IntrReadMem, IntrArgMemOnly, ImmArg>]>; + +def int_loongarch_lsx_vst + : VecInt<[], [llvm_v16i8_ty, llvm_ptr_ty, llvm_i32_ty], + [IntrWriteMem, IntrArgMemOnly, ImmArg>]>; +def int_loongarch_lsx_vstx + : VecInt<[], [llvm_v16i8_ty, llvm_ptr_ty, llvm_i64_ty], + [IntrWriteMem, IntrArgMemOnly]>; +def int_loongarch_lsx_vstelm_b + : VecInt<[], [llvm_v16i8_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], + [IntrWriteMem, IntrArgMemOnly, ImmArg>, ImmArg>]>; +def int_loongarch_lsx_vstelm_h + : VecInt<[], [llvm_v8i16_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], + [IntrWriteMem, IntrArgMemOnly, ImmArg>, ImmArg>]>; +def int_loongarch_lsx_vstelm_w + : VecInt<[], [llvm_v4i32_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], + [IntrWriteMem, IntrArgMemOnly, ImmArg>, ImmArg>]>; +def int_loongarch_lsx_vstelm_d + : VecInt<[], [llvm_v2i64_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], + [IntrWriteMem, IntrArgMemOnly, ImmArg>, ImmArg>]>; + +} // TargetPrefix = "loongarch" + +//===----------------------------------------------------------------------===// +// LASX + +let TargetPrefix = "loongarch" in { +foreach inst = ["xvadd_b", "xvsub_b", + "xvsadd_b", "xvsadd_bu", "xvssub_b", "xvssub_bu", + "xvavg_b", "xvavg_bu", "xvavgr_b", "xvavgr_bu", + "xvabsd_b", "xvabsd_bu", "xvadda_b", + "xvmax_b", "xvmax_bu", "xvmin_b", "xvmin_bu", + "xvmul_b", "xvmuh_b", "xvmuh_bu", + "xvdiv_b", "xvdiv_bu", "xvmod_b", "xvmod_bu", "xvsigncov_b", + "xvand_v", "xvor_v", "xvxor_v", "xvnor_v", "xvandn_v", "xvorn_v", + "xvsll_b", "xvsrl_b", "xvsra_b", "xvrotr_b", "xvsrlr_b", "xvsrar_b", + "xvbitclr_b", "xvbitset_b", "xvbitrev_b", + "xvseq_b", "xvsle_b", "xvsle_bu", "xvslt_b", "xvslt_bu", + "xvpackev_b", "xvpackod_b", "xvpickev_b", "xvpickod_b", + "xvilvl_b", "xvilvh_b"] in + def int_loongarch_lasx_#inst : VecInt<[llvm_v32i8_ty], + [llvm_v32i8_ty, llvm_v32i8_ty], + [IntrNoMem]>; + +foreach inst = ["xvadd_h", "xvsub_h", + "xvsadd_h", "xvsadd_hu", "xvssub_h", "xvssub_hu", + "xvavg_h", "xvavg_hu", "xvavgr_h", "xvavgr_hu", + "xvabsd_h", "xvabsd_hu", "xvadda_h", + "xvmax_h", "xvmax_hu", "xvmin_h", "xvmin_hu", + "xvmul_h", "xvmuh_h", "xvmuh_hu", + "xvdiv_h", "xvdiv_hu", "xvmod_h", "xvmod_hu", "xvsigncov_h", + "xvsll_h", "xvsrl_h", "xvsra_h", "xvrotr_h", "xvsrlr_h", "xvsrar_h", + "xvbitclr_h", "xvbitset_h", "xvbitrev_h", + "xvseq_h", "xvsle_h", "xvsle_hu", "xvslt_h", "xvslt_hu", + "xvpackev_h", "xvpackod_h", "xvpickev_h", "xvpickod_h", + "xvilvl_h", "xvilvh_h"] in + def int_loongarch_lasx_#inst : VecInt<[llvm_v16i16_ty], + [llvm_v16i16_ty, llvm_v16i16_ty], + [IntrNoMem]>; + +foreach inst = ["xvadd_w", "xvsub_w", + "xvsadd_w", "xvsadd_wu", "xvssub_w", "xvssub_wu", + "xvavg_w", "xvavg_wu", "xvavgr_w", "xvavgr_wu", + "xvabsd_w", "xvabsd_wu", "xvadda_w", + "xvmax_w", "xvmax_wu", "xvmin_w", "xvmin_wu", + "xvmul_w", "xvmuh_w", "xvmuh_wu", + "xvdiv_w", "xvdiv_wu", "xvmod_w", "xvmod_wu", "xvsigncov_w", + "xvsll_w", "xvsrl_w", "xvsra_w", "xvrotr_w", "xvsrlr_w", "xvsrar_w", + "xvbitclr_w", "xvbitset_w", "xvbitrev_w", + "xvseq_w", "xvsle_w", "xvsle_wu", "xvslt_w", "xvslt_wu", + "xvpackev_w", "xvpackod_w", "xvpickev_w", "xvpickod_w", + "xvilvl_w", "xvilvh_w", "xvperm_w"] in + def int_loongarch_lasx_#inst : VecInt<[llvm_v8i32_ty], + [llvm_v8i32_ty, llvm_v8i32_ty], + [IntrNoMem]>; + +foreach inst = ["xvadd_d", "xvadd_q", "xvsub_d", "xvsub_q", + "xvsadd_d", "xvsadd_du", "xvssub_d", "xvssub_du", + "xvhaddw_q_d", "xvhaddw_qu_du", "xvhsubw_q_d", "xvhsubw_qu_du", + "xvaddwev_q_d", "xvaddwod_q_d", "xvsubwev_q_d", "xvsubwod_q_d", + "xvaddwev_q_du", "xvaddwod_q_du", "xvsubwev_q_du", "xvsubwod_q_du", + "xvaddwev_q_du_d", "xvaddwod_q_du_d", + "xvavg_d", "xvavg_du", "xvavgr_d", "xvavgr_du", + "xvabsd_d", "xvabsd_du", "xvadda_d", + "xvmax_d", "xvmax_du", "xvmin_d", "xvmin_du", + "xvmul_d", "xvmuh_d", "xvmuh_du", + "xvmulwev_q_d", "xvmulwod_q_d", "xvmulwev_q_du", "xvmulwod_q_du", + "xvmulwev_q_du_d", "xvmulwod_q_du_d", + "xvdiv_d", "xvdiv_du", "xvmod_d", "xvmod_du", "xvsigncov_d", + "xvsll_d", "xvsrl_d", "xvsra_d", "xvrotr_d", "xvsrlr_d", "xvsrar_d", + "xvbitclr_d", "xvbitset_d", "xvbitrev_d", + "xvseq_d", "xvsle_d", "xvsle_du", "xvslt_d", "xvslt_du", + "xvpackev_d", "xvpackod_d", "xvpickev_d", "xvpickod_d", + "xvilvl_d", "xvilvh_d"] in + def int_loongarch_lasx_#inst : VecInt<[llvm_v4i64_ty], + [llvm_v4i64_ty, llvm_v4i64_ty], + [IntrNoMem]>; + +foreach inst = ["xvaddi_bu", "xvsubi_bu", + "xvmaxi_b", "xvmaxi_bu", "xvmini_b", "xvmini_bu", + "xvsat_b", "xvsat_bu", + "xvandi_b", "xvori_b", "xvxori_b", "xvnori_b", + "xvslli_b", "xvsrli_b", "xvsrai_b", "xvrotri_b", + "xvsrlri_b", "xvsrari_b", + "xvbitclri_b", "xvbitseti_b", "xvbitrevi_b", + "xvseqi_b", "xvslei_b", "xvslei_bu", "xvslti_b", "xvslti_bu", + "xvrepl128vei_b", "xvbsll_v", "xvbsrl_v", "xvshuf4i_b"] in + def int_loongarch_lasx_#inst : VecInt<[llvm_v32i8_ty], + [llvm_v32i8_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; +foreach inst = ["xvaddi_hu", "xvsubi_hu", + "xvmaxi_h", "xvmaxi_hu", "xvmini_h", "xvmini_hu", + "xvsat_h", "xvsat_hu", + "xvslli_h", "xvsrli_h", "xvsrai_h", "xvrotri_h", + "xvsrlri_h", "xvsrari_h", + "xvbitclri_h", "xvbitseti_h", "xvbitrevi_h", + "xvseqi_h", "xvslei_h", "xvslei_hu", "xvslti_h", "xvslti_hu", + "xvrepl128vei_h", "xvshuf4i_h"] in + def int_loongarch_lasx_#inst : VecInt<[llvm_v16i16_ty], + [llvm_v16i16_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; +foreach inst = ["xvaddi_wu", "xvsubi_wu", + "xvmaxi_w", "xvmaxi_wu", "xvmini_w", "xvmini_wu", + "xvsat_w", "xvsat_wu", + "xvslli_w", "xvsrli_w", "xvsrai_w", "xvrotri_w", + "xvsrlri_w", "xvsrari_w", + "xvbitclri_w", "xvbitseti_w", "xvbitrevi_w", + "xvseqi_w", "xvslei_w", "xvslei_wu", "xvslti_w", "xvslti_wu", + "xvrepl128vei_w", "xvshuf4i_w", "xvpickve_w"] in + def int_loongarch_lasx_#inst : VecInt<[llvm_v8i32_ty], + [llvm_v8i32_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; +foreach inst = ["xvaddi_du", "xvsubi_du", + "xvmaxi_d", "xvmaxi_du", "xvmini_d", "xvmini_du", + "xvsat_d", "xvsat_du", + "xvslli_d", "xvsrli_d", "xvsrai_d", "xvrotri_d", + "xvsrlri_d", "xvsrari_d", + "xvbitclri_d", "xvbitseti_d", "xvbitrevi_d", + "xvseqi_d", "xvslei_d", "xvslei_du", "xvslti_d", "xvslti_du", + "xvrepl128vei_d", "xvpermi_d", "xvpickve_d"] in + def int_loongarch_lasx_#inst : VecInt<[llvm_v4i64_ty], + [llvm_v4i64_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; + +foreach inst = ["xvhaddw_h_b", "xvhaddw_hu_bu", "xvhsubw_h_b", "xvhsubw_hu_bu", + "xvaddwev_h_b", "xvaddwod_h_b", "xvsubwev_h_b", "xvsubwod_h_b", + "xvaddwev_h_bu", "xvaddwod_h_bu", "xvsubwev_h_bu", "xvsubwod_h_bu", + "xvaddwev_h_bu_b", "xvaddwod_h_bu_b", + "xvmulwev_h_b", "xvmulwod_h_b", "xvmulwev_h_bu", "xvmulwod_h_bu", + "xvmulwev_h_bu_b", "xvmulwod_h_bu_b"] in + def int_loongarch_lasx_#inst : VecInt<[llvm_v16i16_ty], + [llvm_v32i8_ty, llvm_v32i8_ty], + [IntrNoMem]>; + +foreach inst = ["xvhaddw_w_h", "xvhaddw_wu_hu", "xvhsubw_w_h", "xvhsubw_wu_hu", + "xvaddwev_w_h", "xvaddwod_w_h", "xvsubwev_w_h", "xvsubwod_w_h", + "xvaddwev_w_hu", "xvaddwod_w_hu", "xvsubwev_w_hu", "xvsubwod_w_hu", + "xvaddwev_w_hu_h", "xvaddwod_w_hu_h", + "xvmulwev_w_h", "xvmulwod_w_h", "xvmulwev_w_hu", "xvmulwod_w_hu", + "xvmulwev_w_hu_h", "xvmulwod_w_hu_h"] in + def int_loongarch_lasx_#inst : VecInt<[llvm_v8i32_ty], + [llvm_v16i16_ty, llvm_v16i16_ty], + [IntrNoMem]>; + +foreach inst = ["xvhaddw_d_w", "xvhaddw_du_wu", "xvhsubw_d_w", "xvhsubw_du_wu", + "xvaddwev_d_w", "xvaddwod_d_w", "xvsubwev_d_w", "xvsubwod_d_w", + "xvaddwev_d_wu", "xvaddwod_d_wu", "xvsubwev_d_wu", "xvsubwod_d_wu", + "xvaddwev_d_wu_w", "xvaddwod_d_wu_w", + "xvmulwev_d_w", "xvmulwod_d_w", "xvmulwev_d_wu", "xvmulwod_d_wu", + "xvmulwev_d_wu_w", "xvmulwod_d_wu_w"] in + def int_loongarch_lasx_#inst : VecInt<[llvm_v4i64_ty], + [llvm_v8i32_ty, llvm_v8i32_ty], + [IntrNoMem]>; + +foreach inst = ["xvsrln_b_h", "xvsran_b_h", "xvsrlrn_b_h", "xvsrarn_b_h", + "xvssrln_b_h", "xvssran_b_h", "xvssrln_bu_h", "xvssran_bu_h", + "xvssrlrn_b_h", "xvssrarn_b_h", "xvssrlrn_bu_h", "xvssrarn_bu_h"] in + def int_loongarch_lasx_#inst : VecInt<[llvm_v32i8_ty], + [llvm_v16i16_ty, llvm_v16i16_ty], + [IntrNoMem]>; + +foreach inst = ["xvsrln_h_w", "xvsran_h_w", "xvsrlrn_h_w", "xvsrarn_h_w", + "xvssrln_h_w", "xvssran_h_w", "xvssrln_hu_w", "xvssran_hu_w", + "xvssrlrn_h_w", "xvssrarn_h_w", "xvssrlrn_hu_w", "xvssrarn_hu_w"] in + def int_loongarch_lasx_#inst : VecInt<[llvm_v16i16_ty], + [llvm_v8i32_ty, llvm_v8i32_ty], + [IntrNoMem]>; + +foreach inst = ["xvsrln_w_d", "xvsran_w_d", "xvsrlrn_w_d", "xvsrarn_w_d", + "xvssrln_w_d", "xvssran_w_d", "xvssrln_wu_d", "xvssran_wu_d", + "xvssrlrn_w_d", "xvssrarn_w_d", "xvssrlrn_wu_d", "xvssrarn_wu_d"] in + def int_loongarch_lasx_#inst : VecInt<[llvm_v8i32_ty], + [llvm_v4i64_ty, llvm_v4i64_ty], + [IntrNoMem]>; + +foreach inst = ["xvmadd_b", "xvmsub_b", "xvfrstp_b", "xvbitsel_v", "xvshuf_b"] in + def int_loongarch_lasx_#inst + : VecInt<[llvm_v32i8_ty], + [llvm_v32i8_ty, llvm_v32i8_ty, llvm_v32i8_ty], + [IntrNoMem]>; +foreach inst = ["xvmadd_h", "xvmsub_h", "xvfrstp_h", "xvshuf_h"] in + def int_loongarch_lasx_#inst + : VecInt<[llvm_v16i16_ty], + [llvm_v16i16_ty, llvm_v16i16_ty, llvm_v16i16_ty], + [IntrNoMem]>; +foreach inst = ["xvmadd_w", "xvmsub_w", "xvshuf_w"] in + def int_loongarch_lasx_#inst + : VecInt<[llvm_v8i32_ty], + [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty], + [IntrNoMem]>; +foreach inst = ["xvmadd_d", "xvmsub_d", "xvshuf_d"] in + def int_loongarch_lasx_#inst + : VecInt<[llvm_v4i64_ty], + [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty], + [IntrNoMem]>; + +foreach inst = ["xvsrlni_b_h", "xvsrani_b_h", "xvsrlrni_b_h", "xvsrarni_b_h", + "xvssrlni_b_h", "xvssrani_b_h", "xvssrlni_bu_h", "xvssrani_bu_h", + "xvssrlrni_b_h", "xvssrarni_b_h", "xvssrlrni_bu_h", "xvssrarni_bu_h", + "xvfrstpi_b", "xvbitseli_b", "xvextrins_b", "xvpermi_q"] in + def int_loongarch_lasx_#inst + : VecInt<[llvm_v32i8_ty], + [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; +foreach inst = ["xvsrlni_h_w", "xvsrani_h_w", "xvsrlrni_h_w", "xvsrarni_h_w", + "xvssrlni_h_w", "xvssrani_h_w", "xvssrlni_hu_w", "xvssrani_hu_w", + "xvssrlrni_h_w", "xvssrarni_h_w", "xvssrlrni_hu_w", "xvssrarni_hu_w", + "xvfrstpi_h", "xvextrins_h"] in + def int_loongarch_lasx_#inst + : VecInt<[llvm_v16i16_ty], + [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; +foreach inst = ["xvsrlni_w_d", "xvsrani_w_d", "xvsrlrni_w_d", "xvsrarni_w_d", + "xvssrlni_w_d", "xvssrani_w_d", "xvssrlni_wu_d", "xvssrani_wu_d", + "xvssrlrni_w_d", "xvssrarni_w_d", "xvssrlrni_wu_d", "xvssrarni_wu_d", + "xvpermi_w", "xvextrins_w", "xvinsve0_w"] in + def int_loongarch_lasx_#inst + : VecInt<[llvm_v8i32_ty], + [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; +foreach inst = ["xvsrlni_d_q", "xvsrani_d_q", "xvsrlrni_d_q", "xvsrarni_d_q", + "xvssrlni_d_q", "xvssrani_d_q", "xvssrlni_du_q", "xvssrani_du_q", + "xvssrlrni_d_q", "xvssrarni_d_q", "xvssrlrni_du_q", "xvssrarni_du_q", + "xvshuf4i_d", "xvextrins_d", "xvinsve0_d"] in + def int_loongarch_lasx_#inst + : VecInt<[llvm_v4i64_ty], + [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; + +foreach inst = ["xvmaddwev_h_b", "xvmaddwod_h_b", "xvmaddwev_h_bu", + "xvmaddwod_h_bu", "xvmaddwev_h_bu_b", "xvmaddwod_h_bu_b"] in + def int_loongarch_lasx_#inst + : VecInt<[llvm_v16i16_ty], + [llvm_v16i16_ty, llvm_v32i8_ty, llvm_v32i8_ty], + [IntrNoMem]>; +foreach inst = ["xvmaddwev_w_h", "xvmaddwod_w_h", "xvmaddwev_w_hu", + "xvmaddwod_w_hu", "xvmaddwev_w_hu_h", "xvmaddwod_w_hu_h"] in + def int_loongarch_lasx_#inst + : VecInt<[llvm_v8i32_ty], + [llvm_v8i32_ty, llvm_v16i16_ty, llvm_v16i16_ty], + [IntrNoMem]>; +foreach inst = ["xvmaddwev_d_w", "xvmaddwod_d_w", "xvmaddwev_d_wu", + "xvmaddwod_d_wu", "xvmaddwev_d_wu_w", "xvmaddwod_d_wu_w"] in + def int_loongarch_lasx_#inst + : VecInt<[llvm_v4i64_ty], + [llvm_v4i64_ty, llvm_v8i32_ty, llvm_v8i32_ty], + [IntrNoMem]>; +foreach inst = ["xvmaddwev_q_d", "xvmaddwod_q_d", "xvmaddwev_q_du", + "xvmaddwod_q_du", "xvmaddwev_q_du_d", "xvmaddwod_q_du_d"] in + def int_loongarch_lasx_#inst + : VecInt<[llvm_v4i64_ty], + [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty], + [IntrNoMem]>; + +foreach inst = ["xvsllwil_h_b", "xvsllwil_hu_bu"] in + def int_loongarch_lasx_#inst : VecInt<[llvm_v16i16_ty], + [llvm_v32i8_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; +foreach inst = ["xvsllwil_w_h", "xvsllwil_wu_hu"] in + def int_loongarch_lasx_#inst : VecInt<[llvm_v8i32_ty], + [llvm_v16i16_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; +foreach inst = ["xvsllwil_d_w", "xvsllwil_du_wu"] in + def int_loongarch_lasx_#inst : VecInt<[llvm_v4i64_ty], + [llvm_v8i32_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; + +foreach inst = ["xvneg_b", "xvmskltz_b", "xvmskgez_b", "xvmsknz_b", + "xvclo_b", "xvclz_b", "xvpcnt_b", + "xvreplve0_b", "xvreplve0_q"] in + def int_loongarch_lasx_#inst : VecInt<[llvm_v32i8_ty], [llvm_v32i8_ty], + [IntrNoMem]>; +foreach inst = ["xvneg_h", "xvmskltz_h", "xvclo_h", "xvclz_h", "xvpcnt_h", + "xvreplve0_h"] in + def int_loongarch_lasx_#inst : VecInt<[llvm_v16i16_ty], [llvm_v16i16_ty], + [IntrNoMem]>; +foreach inst = ["xvneg_w", "xvmskltz_w", "xvclo_w", "xvclz_w", "xvpcnt_w", + "xvreplve0_w"] in + def int_loongarch_lasx_#inst : VecInt<[llvm_v8i32_ty], [llvm_v8i32_ty], + [IntrNoMem]>; +foreach inst = ["xvneg_d", "xvexth_q_d", "xvexth_qu_du", "xvmskltz_d", + "xvextl_q_d", "xvextl_qu_du", "xvclo_d", "xvclz_d", "xvpcnt_d", + "xvreplve0_d"] in + def int_loongarch_lasx_#inst : VecInt<[llvm_v4i64_ty], [llvm_v4i64_ty], + [IntrNoMem]>; + +foreach inst = ["xvexth_h_b", "xvexth_hu_bu", "vext2xv_h_b", "vext2xv_hu_bu"] in + def int_loongarch_lasx_#inst : VecInt<[llvm_v16i16_ty], [llvm_v32i8_ty], + [IntrNoMem]>; +foreach inst = ["xvexth_w_h", "xvexth_wu_hu", "vext2xv_w_h", "vext2xv_wu_hu"] in + def int_loongarch_lasx_#inst : VecInt<[llvm_v8i32_ty], [llvm_v16i16_ty], + [IntrNoMem]>; +foreach inst = ["xvexth_d_w", "xvexth_du_wu", "vext2xv_d_w", "vext2xv_du_wu"] in + def int_loongarch_lasx_#inst : VecInt<[llvm_v4i64_ty], [llvm_v8i32_ty], + [IntrNoMem]>; + +foreach inst = ["vext2xv_w_b", "vext2xv_wu_bu"] in + def int_loongarch_lasx_#inst : VecInt<[llvm_v8i32_ty], [llvm_v32i8_ty], + [IntrNoMem]>; +foreach inst = ["vext2xv_d_h", "vext2xv_du_hu"] in + def int_loongarch_lasx_#inst : VecInt<[llvm_v4i64_ty], [llvm_v16i16_ty], + [IntrNoMem]>; + +foreach inst = ["vext2xv_d_b", "vext2xv_du_bu"] in + def int_loongarch_lasx_#inst : VecInt<[llvm_v4i64_ty], [llvm_v32i8_ty], + [IntrNoMem]>; + +def int_loongarch_lasx_xvldi : VecInt<[llvm_v4i64_ty], [llvm_i32_ty], + [IntrNoMem, ImmArg>]>; +def int_loongarch_lasx_xvrepli_b : VecInt<[llvm_v32i8_ty], [llvm_i32_ty], + [IntrNoMem, ImmArg>]>; +def int_loongarch_lasx_xvrepli_h : VecInt<[llvm_v16i16_ty], [llvm_i32_ty], + [IntrNoMem, ImmArg>]>; +def int_loongarch_lasx_xvrepli_w : VecInt<[llvm_v8i32_ty], [llvm_i32_ty], + [IntrNoMem, ImmArg>]>; +def int_loongarch_lasx_xvrepli_d : VecInt<[llvm_v4i64_ty], [llvm_i32_ty], + [IntrNoMem, ImmArg>]>; + +def int_loongarch_lasx_xvreplgr2vr_b : VecInt<[llvm_v32i8_ty], [llvm_i32_ty], + [IntrNoMem]>; +def int_loongarch_lasx_xvreplgr2vr_h : VecInt<[llvm_v16i16_ty], [llvm_i32_ty], + [IntrNoMem]>; +def int_loongarch_lasx_xvreplgr2vr_w : VecInt<[llvm_v8i32_ty], [llvm_i32_ty], + [IntrNoMem]>; +def int_loongarch_lasx_xvreplgr2vr_d : VecInt<[llvm_v4i64_ty], [llvm_i64_ty], + [IntrNoMem]>; + +def int_loongarch_lasx_xvinsgr2vr_w + : VecInt<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; +def int_loongarch_lasx_xvinsgr2vr_d + : VecInt<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i64_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; + +def int_loongarch_lasx_xvreplve_b + : VecInt<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvreplve_h + : VecInt<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvreplve_w + : VecInt<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_i32_ty], [IntrNoMem]>; +def int_loongarch_lasx_xvreplve_d + : VecInt<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_i32_ty], [IntrNoMem]>; + +foreach inst = ["xvpickve2gr_w", "xvpickve2gr_wu" ] in + def int_loongarch_lasx_#inst : VecInt<[llvm_i32_ty], + [llvm_v8i32_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; +foreach inst = ["xvpickve2gr_d", "xvpickve2gr_du" ] in + def int_loongarch_lasx_#inst : VecInt<[llvm_i64_ty], + [llvm_v4i64_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; + +def int_loongarch_lasx_xbz_b : VecInt<[llvm_i32_ty], [llvm_v32i8_ty], + [IntrNoMem]>; +def int_loongarch_lasx_xbz_h : VecInt<[llvm_i32_ty], [llvm_v16i16_ty], + [IntrNoMem]>; +def int_loongarch_lasx_xbz_w : VecInt<[llvm_i32_ty], [llvm_v8i32_ty], + [IntrNoMem]>; +def int_loongarch_lasx_xbz_d : VecInt<[llvm_i32_ty], [llvm_v4i64_ty], + [IntrNoMem]>; +def int_loongarch_lasx_xbz_v : VecInt<[llvm_i32_ty], [llvm_v32i8_ty], + [IntrNoMem]>; + +def int_loongarch_lasx_xbnz_v : VecInt<[llvm_i32_ty], [llvm_v32i8_ty], + [IntrNoMem]>; +def int_loongarch_lasx_xbnz_b : VecInt<[llvm_i32_ty], [llvm_v32i8_ty], + [IntrNoMem]>; +def int_loongarch_lasx_xbnz_h : VecInt<[llvm_i32_ty], [llvm_v16i16_ty], + [IntrNoMem]>; +def int_loongarch_lasx_xbnz_w : VecInt<[llvm_i32_ty], [llvm_v8i32_ty], + [IntrNoMem]>; +def int_loongarch_lasx_xbnz_d : VecInt<[llvm_i32_ty], [llvm_v4i64_ty], + [IntrNoMem]>; + +// LASX Float + +foreach inst = ["xvfadd_s", "xvfsub_s", "xvfmul_s", "xvfdiv_s", + "xvfmax_s", "xvfmin_s", "xvfmaxa_s", "xvfmina_s"] in + def int_loongarch_lasx_#inst : VecInt<[llvm_v8f32_ty], + [llvm_v8f32_ty, llvm_v8f32_ty], + [IntrNoMem]>; +foreach inst = ["xvfadd_d", "xvfsub_d", "xvfmul_d", "xvfdiv_d", + "xvfmax_d", "xvfmin_d", "xvfmaxa_d", "xvfmina_d"] in + def int_loongarch_lasx_#inst : VecInt<[llvm_v4f64_ty], + [llvm_v4f64_ty, llvm_v4f64_ty], + [IntrNoMem]>; + +foreach inst = ["xvfmadd_s", "xvfmsub_s", "xvfnmadd_s", "xvfnmsub_s"] in + def int_loongarch_lasx_#inst + : VecInt<[llvm_v8f32_ty], + [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty], + [IntrNoMem]>; +foreach inst = ["xvfmadd_d", "xvfmsub_d", "xvfnmadd_d", "xvfnmsub_d"] in + def int_loongarch_lasx_#inst + : VecInt<[llvm_v4f64_ty], + [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty], + [IntrNoMem]>; + +foreach inst = ["xvflogb_s", "xvfsqrt_s", "xvfrecip_s", "xvfrsqrt_s", "xvfrint_s", + "xvfrintrne_s", "xvfrintrz_s", "xvfrintrp_s", "xvfrintrm_s"] in + def int_loongarch_lasx_#inst : VecInt<[llvm_v8f32_ty], [llvm_v8f32_ty], + [IntrNoMem]>; +foreach inst = ["xvflogb_d", "xvfsqrt_d", "xvfrecip_d", "xvfrsqrt_d", "xvfrint_d", + "xvfrintrne_d", "xvfrintrz_d", "xvfrintrp_d", "xvfrintrm_d"] in + def int_loongarch_lasx_#inst : VecInt<[llvm_v4f64_ty], [llvm_v4f64_ty], + [IntrNoMem]>; + +foreach inst = ["xvfcvtl_s_h", "xvfcvth_s_h"] in + def int_loongarch_lasx_#inst : VecInt<[llvm_v8f32_ty], [llvm_v16i16_ty], + [IntrNoMem]>; +foreach inst = ["xvfcvtl_d_s", "xvfcvth_d_s"] in + def int_loongarch_lasx_#inst : VecInt<[llvm_v4f64_ty], [llvm_v8f32_ty], + [IntrNoMem]>; + +foreach inst = ["xvftintrne_w_s", "xvftintrz_w_s", "xvftintrp_w_s", "xvftintrm_w_s", + "xvftint_w_s", "xvftintrz_wu_s", "xvftint_wu_s", "xvfclass_s"] in + def int_loongarch_lasx_#inst : VecInt<[llvm_v8i32_ty], [llvm_v8f32_ty], + [IntrNoMem]>; +foreach inst = ["xvftintrne_l_d", "xvftintrz_l_d", "xvftintrp_l_d", "xvftintrm_l_d", + "xvftint_l_d", "xvftintrz_lu_d", "xvftint_lu_d", "xvfclass_d"] in + def int_loongarch_lasx_#inst : VecInt<[llvm_v4i64_ty], [llvm_v4f64_ty], + [IntrNoMem]>; + +foreach inst = ["xvftintrnel_l_s", "xvftintrneh_l_s", "xvftintrzl_l_s", + "xvftintrzh_l_s", "xvftintrpl_l_s", "xvftintrph_l_s", + "xvftintrml_l_s", "xvftintrmh_l_s", "xvftintl_l_s", + "xvftinth_l_s"] in + def int_loongarch_lasx_#inst : VecInt<[llvm_v4i64_ty], [llvm_v8f32_ty], + [IntrNoMem]>; + +foreach inst = ["xvffint_s_w", "xvffint_s_wu"] in + def int_loongarch_lasx_#inst : VecInt<[llvm_v8f32_ty], [llvm_v8i32_ty], + [IntrNoMem]>; +foreach inst = ["xvffint_d_l", "xvffint_d_lu"] in + def int_loongarch_lasx_#inst : VecInt<[llvm_v4f64_ty], [llvm_v4i64_ty], + [IntrNoMem]>; + +foreach inst = ["xvffintl_d_w", "xvffinth_d_w"] in + def int_loongarch_lasx_#inst : VecInt<[llvm_v4f64_ty], [llvm_v8i32_ty], + [IntrNoMem]>; + +foreach inst = ["xvffint_s_l"] in + def int_loongarch_lasx_#inst : VecInt<[llvm_v8f32_ty], + [llvm_v4i64_ty, llvm_v4i64_ty], + [IntrNoMem]>; +foreach inst = ["xvftintrne_w_d", "xvftintrz_w_d", "xvftintrp_w_d", "xvftintrm_w_d", + "xvftint_w_d"] in + def int_loongarch_lasx_#inst : VecInt<[llvm_v8i32_ty], + [llvm_v4f64_ty, llvm_v4f64_ty], + [IntrNoMem]>; + +foreach inst = ["xvfcvt_h_s"] in + def int_loongarch_lasx_#inst : VecInt<[llvm_v16i16_ty], + [llvm_v8f32_ty, llvm_v8f32_ty], + [IntrNoMem]>; +foreach inst = ["xvfcvt_s_d"] in + def int_loongarch_lasx_#inst : VecInt<[llvm_v8f32_ty], + [llvm_v4f64_ty, llvm_v4f64_ty], + [IntrNoMem]>; + +foreach inst = ["xvfcmp_caf_s", "xvfcmp_cun_s", "xvfcmp_ceq_s", "xvfcmp_cueq_s", + "xvfcmp_clt_s", "xvfcmp_cult_s", "xvfcmp_cle_s", "xvfcmp_cule_s", + "xvfcmp_cne_s", "xvfcmp_cor_s", "xvfcmp_cune_s", + "xvfcmp_saf_s", "xvfcmp_sun_s", "xvfcmp_seq_s", "xvfcmp_sueq_s", + "xvfcmp_slt_s", "xvfcmp_sult_s", "xvfcmp_sle_s", "xvfcmp_sule_s", + "xvfcmp_sne_s", "xvfcmp_sor_s", "xvfcmp_sune_s"] in + def int_loongarch_lasx_#inst : VecInt<[llvm_v8i32_ty], + [llvm_v8f32_ty, llvm_v8f32_ty], + [IntrNoMem]>; +foreach inst = ["xvfcmp_caf_d", "xvfcmp_cun_d", "xvfcmp_ceq_d", "xvfcmp_cueq_d", + "xvfcmp_clt_d", "xvfcmp_cult_d", "xvfcmp_cle_d", "xvfcmp_cule_d", + "xvfcmp_cne_d", "xvfcmp_cor_d", "xvfcmp_cune_d", + "xvfcmp_saf_d", "xvfcmp_sun_d", "xvfcmp_seq_d", "xvfcmp_sueq_d", + "xvfcmp_slt_d", "xvfcmp_sult_d", "xvfcmp_sle_d", "xvfcmp_sule_d", + "xvfcmp_sne_d", "xvfcmp_sor_d", "xvfcmp_sune_d"] in + def int_loongarch_lasx_#inst : VecInt<[llvm_v4i64_ty], + [llvm_v4f64_ty, llvm_v4f64_ty], + [IntrNoMem]>; + +def int_loongarch_lasx_xvpickve_w_f + : VecInt<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; +def int_loongarch_lasx_xvpickve_d_f + : VecInt<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; + +// LASX load/store +def int_loongarch_lasx_xvld + : VecInt<[llvm_v32i8_ty], [llvm_ptr_ty, llvm_i32_ty], + [IntrReadMem, IntrArgMemOnly, ImmArg>]>; +def int_loongarch_lasx_xvldx + : VecInt<[llvm_v32i8_ty], [llvm_ptr_ty, llvm_i64_ty], + [IntrReadMem, IntrArgMemOnly]>; +def int_loongarch_lasx_xvldrepl_b + : VecInt<[llvm_v32i8_ty], [llvm_ptr_ty, llvm_i32_ty], + [IntrReadMem, IntrArgMemOnly, ImmArg>]>; +def int_loongarch_lasx_xvldrepl_h + : VecInt<[llvm_v16i16_ty], [llvm_ptr_ty, llvm_i32_ty], + [IntrReadMem, IntrArgMemOnly, ImmArg>]>; +def int_loongarch_lasx_xvldrepl_w + : VecInt<[llvm_v8i32_ty], [llvm_ptr_ty, llvm_i32_ty], + [IntrReadMem, IntrArgMemOnly, ImmArg>]>; +def int_loongarch_lasx_xvldrepl_d + : VecInt<[llvm_v4i64_ty], [llvm_ptr_ty, llvm_i32_ty], + [IntrReadMem, IntrArgMemOnly, ImmArg>]>; + +def int_loongarch_lasx_xvst + : VecInt<[], [llvm_v32i8_ty, llvm_ptr_ty, llvm_i32_ty], + [IntrWriteMem, IntrArgMemOnly, ImmArg>]>; +def int_loongarch_lasx_xvstx + : VecInt<[], [llvm_v32i8_ty, llvm_ptr_ty, llvm_i64_ty], + [IntrWriteMem, IntrArgMemOnly]>; +def int_loongarch_lasx_xvstelm_b + : VecInt<[], [llvm_v32i8_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], + [IntrWriteMem, IntrArgMemOnly, ImmArg>, ImmArg>]>; +def int_loongarch_lasx_xvstelm_h + : VecInt<[], [llvm_v16i16_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], + [IntrWriteMem, IntrArgMemOnly, ImmArg>, ImmArg>]>; +def int_loongarch_lasx_xvstelm_w + : VecInt<[], [llvm_v8i32_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], + [IntrWriteMem, IntrArgMemOnly, ImmArg>, ImmArg>]>; +def int_loongarch_lasx_xvstelm_d + : VecInt<[], [llvm_v4i64_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], + [IntrWriteMem, IntrArgMemOnly, ImmArg>, ImmArg>]>; } // TargetPrefix = "loongarch" diff --git a/llvm/include/llvm/Option/ArgList.h b/llvm/include/llvm/Option/ArgList.h index 6a07e1c657dcafb9ce12618851ea0f256a059e79..a36f288756c2dae4266f7da911567d61f3f8f9a4 100644 --- a/llvm/include/llvm/Option/ArgList.h +++ b/llvm/include/llvm/Option/ArgList.h @@ -299,6 +299,7 @@ public: /// \p Default if neither option is given. If both the option and its /// negation are present, the last one wins. bool hasFlag(OptSpecifier Pos, OptSpecifier Neg, bool Default) const; + bool hasFlagNoClaim(OptSpecifier Pos, OptSpecifier Neg, bool Default) const; /// hasFlag - Given an option \p Pos, an alias \p PosAlias and its negative /// form \p Neg, return true if the option or its alias is present, false if diff --git a/llvm/include/llvm/TableGen/Record.h b/llvm/include/llvm/TableGen/Record.h index 50df38e695d7b254c157c041d1d7adc63af11b46..15fd2c85f2e153a449911aed343bfaccf1d12f13 100644 --- a/llvm/include/llvm/TableGen/Record.h +++ b/llvm/include/llvm/TableGen/Record.h @@ -784,7 +784,17 @@ public: /// class UnOpInit : public OpInit, public FoldingSetNode { public: - enum UnaryOp : uint8_t { CAST, NOT, HEAD, TAIL, SIZE, EMPTY, GETDAGOP }; + enum UnaryOp : uint8_t { + TOLOWER, + TOUPPER, + CAST, + NOT, + HEAD, + TAIL, + SIZE, + EMPTY, + GETDAGOP + }; private: Init *LHS; diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp index 509cda0cc738b252f2f1d8b248392ba59b35a418..2d686775f667c1af4a215cfa4dab364f5a10327a 100644 --- a/llvm/lib/AsmParser/LLLexer.cpp +++ b/llvm/lib/AsmParser/LLLexer.cpp @@ -570,6 +570,7 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(addrspace); KEYWORD(section); KEYWORD(partition); + KEYWORD(code_model); KEYWORD(alias); KEYWORD(ifunc); KEYWORD(module); diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp index 30b0d917b696e3ad09fb6473567122f7c44175fc..b793f4240d6cbf9fdfe9543595f826a3b6d3c5cb 100644 --- a/llvm/lib/AsmParser/LLParser.cpp +++ b/llvm/lib/AsmParser/LLParser.cpp @@ -1261,7 +1261,13 @@ bool LLParser::parseGlobal(const std::string &Name, LocTy NameLoc, MaybeAlign Alignment; if (parseOptionalAlignment(Alignment)) return true; - GV->setAlignment(Alignment); + if (Alignment) + GV->setAlignment(*Alignment); + } else if (Lex.getKind() == lltok::kw_code_model) { + CodeModel::Model CodeModel; + if (parseOptionalCodeModel(CodeModel)) + return true; + GV->setCodeModel(CodeModel); } else if (Lex.getKind() == lltok::MetadataVar) { if (parseGlobalObjectMetadataAttachment(*GV)) return true; @@ -2077,6 +2083,30 @@ bool LLParser::parseOptionalAlignment(MaybeAlign &Alignment, bool AllowParens) { return false; } +/// parseOptionalCodeModel +/// ::= /* empty */ +/// ::= 'code_model' "large" +bool LLParser::parseOptionalCodeModel(CodeModel::Model &model) { + Lex.Lex(); + auto StrVal = Lex.getStrVal(); + auto ErrMsg = "expected global code model string"; + if (StrVal == "tiny") + model = CodeModel::Tiny; + else if (StrVal == "small") + model = CodeModel::Small; + else if (StrVal == "kernel") + model = CodeModel::Kernel; + else if (StrVal == "medium") + model = CodeModel::Medium; + else if (StrVal == "large") + model = CodeModel::Large; + else + return tokError(ErrMsg); + if (parseToken(lltok::StringConstant, ErrMsg)) + return true; + return false; +} + /// parseOptionalDerefAttrBytes /// ::= /* empty */ /// ::= AttrKind '(' 4 ')' diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp index 4e88d10e7f65cde12a173ea4e440b2eae4983a54..68e6509ea6a53392d7d6d8857d4b3612e8349ec9 100644 --- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -1135,6 +1135,23 @@ static bool getDecodedDSOLocal(unsigned Val) { } } +static Optional getDecodedCodeModel(unsigned Val) { + switch (Val) { + case 1: + return CodeModel::Tiny; + case 2: + return CodeModel::Small; + case 3: + return CodeModel::Kernel; + case 4: + return CodeModel::Medium; + case 5: + return CodeModel::Large; + } + + return {}; +} + static GlobalVariable::ThreadLocalMode getDecodedThreadLocalMode(unsigned Val) { switch (Val) { case 0: return GlobalVariable::NotThreadLocal; @@ -3696,6 +3713,7 @@ Error BitcodeReader::parseGlobalVarRecord(ArrayRef Record) { // dllstorageclass, comdat, attributes, preemption specifier, // partition strtab offset, partition strtab size] (name in VST) // v2: [strtab_offset, strtab_size, v1] + // v3: [v2, code_model] StringRef Name; std::tie(Name, Record) = readNameFromStrtab(Record); @@ -3799,6 +3817,13 @@ Error BitcodeReader::parseGlobalVarRecord(ArrayRef Record) { NewGV->setSanitizerMetadata(Meta); } + if (Record.size() > 17 && Record[17]) { + if (auto CM = getDecodedCodeModel(Record[17])) + NewGV->setCodeModel(*CM); + else + return error("Invalid global variable code model"); + } + return Error::success(); } diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index 5e05ff26647e3d0576e1eb28465980184cccbe6c..548054e9929d1b4590c9af4586e536405c62a82f 100644 --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -1366,7 +1366,7 @@ void ModuleBitcodeWriter::writeModuleInfo() { // GLOBALVAR: [strtab offset, strtab size, type, isconst, initid, // linkage, alignment, section, visibility, threadlocal, // unnamed_addr, externally_initialized, dllstorageclass, - // comdat, attributes, DSO_Local, GlobalSanitizer] + // comdat, attributes, DSO_Local, GlobalSanitizer, code_model] Vals.push_back(addToStrtab(GV.getName())); Vals.push_back(GV.getName().size()); Vals.push_back(VE.getTypeID(GV.getValueType())); @@ -1383,7 +1383,7 @@ void ModuleBitcodeWriter::writeModuleInfo() { GV.isExternallyInitialized() || GV.getDLLStorageClass() != GlobalValue::DefaultStorageClass || GV.hasComdat() || GV.hasAttributes() || GV.isDSOLocal() || - GV.hasPartition() || GV.hasSanitizerMetadata()) { + GV.hasPartition() || GV.hasSanitizerMetadata() || GV.getCodeModel()) { Vals.push_back(getEncodedVisibility(GV)); Vals.push_back(getEncodedThreadLocalMode(GV)); Vals.push_back(getEncodedUnnamedAddr(GV)); @@ -1401,6 +1401,7 @@ void ModuleBitcodeWriter::writeModuleInfo() { Vals.push_back((GV.hasSanitizerMetadata() ? serializeSanitizerMetadata( GV.getSanitizerMetadata()) : 0)); + Vals.push_back(GV.getCodeModelRaw()); } else { AbbrevToUse = SimpleGVarAbbrev; } diff --git a/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/llvm/lib/CodeGen/PrologEpilogInserter.cpp index 441523c3b8790c0231c18888d07458edcd281c70..ede4938ffda0da556a1bca71c105bc03e7c9f656 100644 --- a/llvm/lib/CodeGen/PrologEpilogInserter.cpp +++ b/llvm/lib/CodeGen/PrologEpilogInserter.cpp @@ -671,12 +671,16 @@ void PEI::RecordCalleeSaveRegisterAndOffset(MachineFunction &MF, const std::vect const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); Triple::ArchType archType = TFI->GetArkSupportTarget(); - if ((archType != Triple::aarch64 && archType != Triple::x86_64) || !(TFI->hasFP(MF))) { + if ((archType != Triple::aarch64 && archType != Triple::loongarch64 && + archType != Triple::x86_64) || + !(TFI->hasFP(MF))) { return; } unsigned FpRegDwarfNum = 0; if (archType == Triple::aarch64) { FpRegDwarfNum = 29; // x29 + } else if (archType == Triple::loongarch64) { + FpRegDwarfNum = 22; // r22(fp) } else { FpRegDwarfNum = 6; //rbp } @@ -702,6 +706,7 @@ void PEI::RecordCalleeSaveRegisterAndOffset(MachineFunction &MF, const std::vect } const unsigned LinkRegDwarfNum = 30; + const unsigned LoongArchLinkRegDwarfNum = 1; for (std::vector::const_iterator I = CSI.begin(), E = CSI.end(); I != E; ++I) { int64_t Offset = MFI.getObjectOffset(I->getFrameIdx()); @@ -711,6 +716,11 @@ void PEI::RecordCalleeSaveRegisterAndOffset(MachineFunction &MF, const std::vect && (archType == Triple::aarch64)) { continue; } + if ((DwarfRegNum == LoongArchLinkRegDwarfNum || + DwarfRegNum == FpRegDwarfNum) && + (archType == Triple::loongarch64)) { + continue; + } Offset = Offset - deleta; std::string key = std::string("DwarfReg") + std::to_string(DwarfRegNum); std::string value = std::to_string(Offset); @@ -1015,7 +1025,8 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) { #ifdef ARK_GC_SUPPORT int CalleeSavedFrameSize = 0; Triple::ArchType archType = TFI.GetArkSupportTarget(); - if (archType == Triple::aarch64 && TFI.hasFP(MF)) { + if ((archType == Triple::aarch64 || archType == Triple::loongarch64) && + TFI.hasFP(MF)) { int fpPosition = TFI.GetFixedFpPosition(); int slotSize = sizeof(uint64_t); int fpToCallerSpDelta = 0; @@ -1040,6 +1051,20 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) { // +--------------------------+ // | RBX | // +--------------------------+ + // for loongarch64 + // +--------------------------+ + // | caller Frame | + // +--------------------------+--- + // | callee save registers | ^ + // | (exclude Fp) | | + // | | callee save registers size(fpToCallerSpDelta) + // +--------------------------+ | + // | Fp | V fpPosition = -1 + // +--------------------------+--- FixedCSEnd + // | type | + // +--------------------------+ + // | ReServeSize | + // +--------------------------+ // for ARM64 // +--------------------------+ // | caller Frame | diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp index 54ab007323302cd99781e1ffe1929ae9f67e0aad..8a53887e04f072f4a9c049447913e3a9fe672951 100644 --- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp +++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp @@ -991,6 +991,18 @@ uint8_t *RuntimeDyldImpl::createStubFunction(uint8_t *Addr, // and stubs for branches Thumb - ARM and ARM - Thumb. writeBytesUnaligned(0xe51ff004, Addr, 4); // ldr pc, [pc, #-4] return Addr + 4; + } else if (Arch == Triple::loongarch64) { + // lu12i.w $t0, %abs_hi20(addr) + // ori $t0, $t0, %abs_lo12(addr) + // lu32i.d $t0, %abs64_lo20(addr) + // lu52i.d $t0, $t0, %abs64_lo12(addr) + // jr $t0 + writeBytesUnaligned(0x1400000c, Addr, 4); + writeBytesUnaligned(0x0380018c, Addr + 4, 4); + writeBytesUnaligned(0x1600000c, Addr + 8, 4); + writeBytesUnaligned(0x0300018c, Addr + 12, 4); + writeBytesUnaligned(0x4c000180, Addr + 16, 4); + return Addr; } else if (IsMipsO32ABI || IsMipsN32ABI) { // 0: 3c190000 lui t9,%hi(addr). // 4: 27390000 addiu t9,t9,%lo(addr). diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp index c702584b7a33e6d179fb5864cf1b31ab73f6c9c8..6090772e0553d43b365ef3d082ec228073766543 100644 --- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp +++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp @@ -641,6 +641,195 @@ void RuntimeDyldELF::resolveARMRelocation(const SectionEntry &Section, } } +bool RuntimeDyldELF::resolveLoongArch64ShortBranch( + unsigned SectionID, relocation_iterator RelI, + const RelocationValueRef &Value) { + uint64_t Address; + if (Value.SymbolName) { + auto Loc = GlobalSymbolTable.find(Value.SymbolName); + // Don't create direct branch for external symbols. + if (Loc == GlobalSymbolTable.end()) + return false; + const auto &SymInfo = Loc->second; + Address = + uint64_t(Sections[SymInfo.getSectionID()].getLoadAddressWithOffset( + SymInfo.getOffset())); + } else { + Address = uint64_t(Sections[Value.SectionID].getLoadAddress()); + } + uint64_t Offset = RelI->getOffset(); + uint64_t SourceAddress = Sections[SectionID].getLoadAddressWithOffset(Offset); + if (!isInt<28>(Address + Value.Addend - SourceAddress)) + return false; + resolveRelocation(Sections[SectionID], Offset, Address, RelI->getType(), + Value.Addend); + return true; +} + +void RuntimeDyldELF::resolveLoongArch64Branch(unsigned SectionID, + const RelocationValueRef &Value, + relocation_iterator RelI, + StubMap &Stubs) { + LLVM_DEBUG(dbgs() << "\t\tThis is an LoongArch64 branch relocation.\n"); + + if (resolveLoongArch64ShortBranch(SectionID, RelI, Value)) + return; + + SectionEntry &Section = Sections[SectionID]; + uint64_t Offset = RelI->getOffset(); + unsigned RelType = RelI->getType(); + // Look for an existing stub. + StubMap::const_iterator i = Stubs.find(Value); + if (i != Stubs.end()) { + resolveRelocation(Section, Offset, + (uint64_t)Section.getAddressWithOffset(i->second), + RelType, 0); + LLVM_DEBUG(dbgs() << " Stub function found\n"); + return; + } + // Create a new stub function. + LLVM_DEBUG(dbgs() << " Create a new stub function\n"); + Stubs[Value] = Section.getStubOffset(); + uint8_t *StubTargetAddr = + createStubFunction(Section.getAddressWithOffset(Section.getStubOffset())); + RelocationEntry LU12I_W(SectionID, StubTargetAddr - Section.getAddress(), + ELF::R_LARCH_ABS_HI20, Value.Addend); + RelocationEntry ORI(SectionID, StubTargetAddr - Section.getAddress() + 4, + ELF::R_LARCH_ABS_LO12, Value.Addend); + RelocationEntry LU32I_D(SectionID, StubTargetAddr - Section.getAddress() + 8, + ELF::R_LARCH_ABS64_LO20, Value.Addend); + RelocationEntry LU52I_D(SectionID, StubTargetAddr - Section.getAddress() + 12, + ELF::R_LARCH_ABS64_HI12, Value.Addend); + if (Value.SymbolName) { + addRelocationForSymbol(LU12I_W, Value.SymbolName); + addRelocationForSymbol(ORI, Value.SymbolName); + addRelocationForSymbol(LU32I_D, Value.SymbolName); + addRelocationForSymbol(LU52I_D, Value.SymbolName); + } else { + addRelocationForSection(LU12I_W, Value.SectionID); + addRelocationForSection(ORI, Value.SectionID); + addRelocationForSection(LU32I_D, Value.SectionID); + + addRelocationForSection(LU52I_D, Value.SectionID); + } + resolveRelocation(Section, Offset, + reinterpret_cast( + Section.getAddressWithOffset(Section.getStubOffset())), + RelType, 0); + Section.advanceStubOffset(getMaxStubSize()); +} + +// Returns extract bits Val[Hi:Lo]. +static inline uint32_t extractBits(uint64_t Val, uint32_t Hi, uint32_t Lo) { + return Hi == 63 ? Val >> Lo : (Val & (((1ULL << (Hi + 1)) - 1))) >> Lo; +} + +void RuntimeDyldELF::resolveLoongArch64Relocation(const SectionEntry &Section, + uint64_t Offset, + uint64_t Value, uint32_t Type, + int64_t Addend) { + auto *TargetPtr = Section.getAddressWithOffset(Offset); + uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset); + + LLVM_DEBUG(dbgs() << "resolveLoongArch64Relocation, LocalAddress: 0x" + << format("%llx", Section.getAddressWithOffset(Offset)) + << " FinalAddress: 0x" << format("%llx", FinalAddress) + << " Value: 0x" << format("%llx", Value) << " Type: 0x" + << format("%x", Type) << " Addend: 0x" + << format("%llx", Addend) << "\n"); + + switch (Type) { + default: + report_fatal_error("Relocation type not implemented yet!"); + break; + case ELF::R_LARCH_32: + support::ulittle32_t::ref{TargetPtr} = + static_cast(Value + Addend); + break; + case ELF::R_LARCH_64: + support::ulittle64_t::ref{TargetPtr} = Value + Addend; + break; + case ELF::R_LARCH_32_PCREL: + support::ulittle32_t::ref{TargetPtr} = + static_cast(Value + Addend - FinalAddress); + break; + case ELF::R_LARCH_B26: { + uint64_t B26 = (Value + Addend - FinalAddress) >> 2; + auto Instr = support::ulittle32_t::ref(TargetPtr); + uint32_t Imm15_0 = extractBits(B26, /*Hi=*/15, /*Lo=*/0) << 10; + uint32_t Imm25_16 = extractBits(B26, /*Hi=*/25, /*Lo=*/16); + Instr = (Instr & 0xfc000000) | Imm15_0 | Imm25_16; + break; + } + case ELF::R_LARCH_GOT_PC_HI20: + case ELF::R_LARCH_PCALA_HI20: { + uint64_t Target = Value + Addend; + uint64_t TargetPage = + (Target + (Target & 0x800)) & ~static_cast(0xfff); + uint64_t PCPage = FinalAddress & ~static_cast(0xfff); + int64_t PageDelta = TargetPage - PCPage; + auto Instr = support::ulittle32_t::ref(TargetPtr); + uint32_t Imm31_12 = extractBits(PageDelta, /*Hi=*/31, /*Lo=*/12) << 5; + Instr = (Instr & 0xfe00001f) | Imm31_12; + break; + } + case ELF::R_LARCH_GOT_PC_LO12: + case ELF::R_LARCH_PCALA_LO12: { + uint64_t TargetOffset = (Value + Addend) & 0xfff; + auto Instr = support::ulittle32_t::ref(TargetPtr); + uint32_t Imm11_0 = TargetOffset << 10; + Instr = (Instr & 0xffc003ff) | Imm11_0; + break; + } + case ELF::R_LARCH_ABS_HI20: { + uint64_t Target = Value + Addend; + auto Instr = support::ulittle32_t::ref(TargetPtr); + uint32_t Imm31_12 = extractBits(Target, /*Hi=*/31, /*Lo=*/12) << 5; + Instr = (Instr & 0xfe00001f) | Imm31_12; + break; + } + case ELF::R_LARCH_ABS_LO12: { + uint64_t Target = Value + Addend; + auto Instr = support::ulittle32_t::ref(TargetPtr); + uint32_t Imm11_0 = extractBits(Target, /*Hi=*/11, /*Lo=*/0) << 10; + Instr = (Instr & 0xffc003ff) | Imm11_0; + break; + } + case ELF::R_LARCH_ABS64_LO20: { + uint64_t Target = Value + Addend; + auto Instr = support::ulittle32_t::ref(TargetPtr); + uint32_t Imm51_32 = extractBits(Target, /*Hi=*/51, /*Lo=*/32) << 5; + Instr = (Instr & 0xfe00001f) | Imm51_32; + break; + } + case ELF::R_LARCH_ABS64_HI12: { + uint64_t Target = Value + Addend; + auto Instr = support::ulittle32_t::ref(TargetPtr); + uint32_t Imm63_52 = extractBits(Target, /*Hi=*/63, /*Lo=*/52) << 10; + Instr = (Instr & 0xffc003ff) | Imm63_52; + break; + } + case ELF::R_LARCH_ADD32: + support::ulittle32_t::ref{TargetPtr} = + (support::ulittle32_t::ref{TargetPtr} + + static_cast(Value + Addend)); + break; + case ELF::R_LARCH_SUB32: + support::ulittle32_t::ref{TargetPtr} = + (support::ulittle32_t::ref{TargetPtr} - + static_cast(Value + Addend)); + break; + case ELF::R_LARCH_ADD64: + support::ulittle64_t::ref{TargetPtr} = + (support::ulittle64_t::ref{TargetPtr} + Value + Addend); + break; + case ELF::R_LARCH_SUB64: + support::ulittle64_t::ref{TargetPtr} = + (support::ulittle64_t::ref{TargetPtr} - Value - Addend); + break; + } +} + void RuntimeDyldELF::setMipsABI(const ObjectFile &Obj) { if (Arch == Triple::UnknownArch || !StringRef(Triple::getArchTypePrefix(Arch)).equals("mips")) { @@ -1057,6 +1246,9 @@ void RuntimeDyldELF::resolveRelocation(const SectionEntry &Section, resolveARMRelocation(Section, Offset, (uint32_t)(Value & 0xffffffffL), Type, (uint32_t)(Addend & 0xffffffffL)); break; + case Triple::loongarch64: + resolveLoongArch64Relocation(Section, Offset, Value, Type, Addend); + break; case Triple::ppc: // Fall through. case Triple::ppcle: resolvePPC32Relocation(Section, Offset, Value, Type, Addend); @@ -1369,6 +1561,17 @@ RuntimeDyldELF::processRelocationRef( } processSimpleRelocation(SectionID, Offset, RelType, Value); } + } else if (Arch == Triple::loongarch64) { + if (RelType == ELF::R_LARCH_B26 && MemMgr.allowStubAllocation()) { + resolveLoongArch64Branch(SectionID, Value, RelI, Stubs); + } else if (RelType == ELF::R_LARCH_GOT_PC_HI20 || + RelType == ELF::R_LARCH_GOT_PC_LO12) { + uint64_t GOTOffset = findOrAllocGOTEntry(Value, ELF::R_LARCH_64); + resolveGOTOffsetRelocation(SectionID, Offset, GOTOffset + Addend, + RelType); + } else { + processSimpleRelocation(SectionID, Offset, RelType, Value); + } } else if (IsMipsO32ABI) { uint8_t *Placeholder = reinterpret_cast( computePlaceholderAddress(SectionID, Offset)); @@ -2218,6 +2421,7 @@ size_t RuntimeDyldELF::getGOTEntrySize() { case Triple::x86_64: case Triple::aarch64: case Triple::aarch64_be: + case Triple::loongarch64: case Triple::ppc64: case Triple::ppc64le: case Triple::systemz: @@ -2368,6 +2572,10 @@ bool RuntimeDyldELF::relocationNeedsGot(const RelocationRef &R) const { return RelTy == ELF::R_AARCH64_ADR_GOT_PAGE || RelTy == ELF::R_AARCH64_LD64_GOT_LO12_NC; + if (Arch == Triple::loongarch64) + return RelTy == ELF::R_LARCH_GOT_PC_HI20 || + RelTy == ELF::R_LARCH_GOT_PC_LO12; + if (Arch == Triple::x86_64) return RelTy == ELF::R_X86_64_GOTPCREL || RelTy == ELF::R_X86_64_GOTPCRELX || diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h index 1251036f4caa8e5730afb826bf35f4f1bf5abd61..0f4d4db2e4313d2120a3ea60dc1c25842be766e5 100644 --- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h +++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h @@ -48,6 +48,18 @@ class RuntimeDyldELF : public RuntimeDyldImpl { void resolveARMRelocation(const SectionEntry &Section, uint64_t Offset, uint32_t Value, uint32_t Type, int32_t Addend); + void resolveLoongArch64Relocation(const SectionEntry &Section, + uint64_t Offset, uint64_t Value, + uint32_t Type, int64_t Addend); + + bool resolveLoongArch64ShortBranch(unsigned SectionID, + relocation_iterator RelI, + const RelocationValueRef &Value); + + void resolveLoongArch64Branch(unsigned SectionID, + const RelocationValueRef &Value, + relocation_iterator RelI, StubMap &Stubs); + void resolvePPC32Relocation(const SectionEntry &Section, uint64_t Offset, uint64_t Value, uint32_t Type, int64_t Addend); @@ -69,6 +81,8 @@ class RuntimeDyldELF : public RuntimeDyldImpl { return 16; else if (IsMipsN64ABI) return 32; + if (Arch == Triple::loongarch64) + return 20; // lu12i.w; ori; lu32i.d; lu52i.d; jr else if (Arch == Triple::ppc64 || Arch == Triple::ppc64le) return 44; else if (Arch == Triple::x86_64) diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp index 2d6bb345e465d2aa3ab05b87767d12cc57881366..e11864d0496e20e7d418bf50ede35152429dc1cc 100644 --- a/llvm/lib/IR/AsmWriter.cpp +++ b/llvm/lib/IR/AsmWriter.cpp @@ -3543,6 +3543,27 @@ void AssemblyWriter::printGlobal(const GlobalVariable *GV) { printEscapedString(GV->getPartition(), Out); Out << '"'; } + if (auto CM = GV->getCodeModel()) { + Out << ", code_model \""; + switch (*CM) { + case CodeModel::Tiny: + Out << "tiny"; + break; + case CodeModel::Small: + Out << "small"; + break; + case CodeModel::Kernel: + Out << "kernel"; + break; + case CodeModel::Medium: + Out << "medium"; + break; + case CodeModel::Large: + Out << "large"; + break; + } + Out << '"'; + } using SanitizerMetadata = llvm::GlobalValue::SanitizerMetadata; if (GV->hasSanitizerMetadata()) { diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp index 5a073632d385bd97b8a2d7ecc8dccf8a066c63e7..377b4d28bbb9a5525ca8d42965f8bda612d1dd82 100644 --- a/llvm/lib/IR/Function.cpp +++ b/llvm/lib/IR/Function.cpp @@ -38,6 +38,7 @@ #include "llvm/IR/IntrinsicsBPF.h" #include "llvm/IR/IntrinsicsDirectX.h" #include "llvm/IR/IntrinsicsHexagon.h" +#include "llvm/IR/IntrinsicsLoongArch.h" #include "llvm/IR/IntrinsicsMips.h" #include "llvm/IR/IntrinsicsNVPTX.h" #include "llvm/IR/IntrinsicsPowerPC.h" diff --git a/llvm/lib/IR/Globals.cpp b/llvm/lib/IR/Globals.cpp index 51a22897babdb7658c3add76a9b7795f1a40bd59..dca1a00c7d9fc7df750cd96ce3abf76b934b9c8a 100644 --- a/llvm/lib/IR/Globals.cpp +++ b/llvm/lib/IR/Globals.cpp @@ -485,6 +485,8 @@ void GlobalVariable::copyAttributesFrom(const GlobalVariable *Src) { GlobalObject::copyAttributesFrom(Src); setExternallyInitialized(Src->isExternallyInitialized()); setAttributes(Src->getAttributes()); + if (auto CM = Src->getCodeModel()) + setCodeModel(*CM); } void GlobalVariable::dropAllReferences() { @@ -492,6 +494,15 @@ void GlobalVariable::dropAllReferences() { clearMetadata(); } +void GlobalVariable::setCodeModel(CodeModel::Model CM) { + unsigned CodeModelData = static_cast(CM) + 1; + unsigned OldData = getGlobalValueSubClassData(); + unsigned NewData = (OldData & ~(CodeModelMask << CodeModelShift)) | + (CodeModelData << CodeModelShift); + setGlobalValueSubClassData(NewData); + assert(getCodeModel() == CM && "Code model representation error!"); +} + //===----------------------------------------------------------------------===// // GlobalAlias Implementation //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Option/ArgList.cpp b/llvm/lib/Option/ArgList.cpp index fab0fb07cbc8311dd7af6a87d99f219d4edf4980..830b9043d4701de4c5b0cfb46e6ca8a25d41a6c4 100644 --- a/llvm/lib/Option/ArgList.cpp +++ b/llvm/lib/Option/ArgList.cpp @@ -76,6 +76,13 @@ bool ArgList::hasFlag(OptSpecifier Pos, OptSpecifier Neg, bool Default) const { return Default; } +bool ArgList::hasFlagNoClaim(OptSpecifier Pos, OptSpecifier Neg, + bool Default) const { + if (Arg *A = getLastArgNoClaim(Pos, Neg)) + return A->getOption().matches(Pos); + return Default; +} + bool ArgList::hasFlag(OptSpecifier Pos, OptSpecifier PosAlias, OptSpecifier Neg, bool Default) const { if (Arg *A = getLastArg(Pos, PosAlias, Neg)) diff --git a/llvm/lib/Support/Host.cpp b/llvm/lib/Support/Host.cpp index 39b7bdb7eeac27ffeb22db36b1a0b966b3e958bc..4afe29eb250b04275bfec7e51792d09e4fa4410a 100644 --- a/llvm/lib/Support/Host.cpp +++ b/llvm/lib/Support/Host.cpp @@ -1405,7 +1405,8 @@ StringRef sys::getHostCPUName() { // Use processor id to detect cpu name. uint32_t processor_id; __asm__("cpucfg %[prid], $zero\n\t" : [prid] "=r"(processor_id)); - switch (processor_id & 0xff00) { + // Refer PRID_SERIES_MASK in linux kernel: arch/loongarch/include/asm/cpu.h. + switch (processor_id & 0xf000) { case 0xc000: // Loongson 64bit, 4-issue return "la464"; // TODO: Others. diff --git a/llvm/lib/Support/Unix/Memory.inc b/llvm/lib/Support/Unix/Memory.inc index 5e008069dd9890f77e9611aa48daf1102f985912..856dd1a7ce478b58f865d9343e9f84427de8236b 100644 --- a/llvm/lib/Support/Unix/Memory.inc +++ b/llvm/lib/Support/Unix/Memory.inc @@ -241,7 +241,8 @@ void Memory::InvalidateInstructionCache(const void *Addr, for (intptr_t Line = StartLine; Line < EndLine; Line += LineSize) asm volatile("icbi 0, %0" : : "r"(Line)); asm volatile("isync"); -# elif (defined(__arm__) || defined(__aarch64__) || defined(__mips__)) && \ +#elif (defined(__arm__) || defined(__aarch64__) || defined(__loongarch__) || \ + defined(__mips__)) && \ defined(__GNUC__) // FIXME: Can we safely always call this for __GNUC__ everywhere? const char *Start = static_cast(Addr); diff --git a/llvm/lib/TableGen/Record.cpp b/llvm/lib/TableGen/Record.cpp index 6b899a049e6b0f1d8c3f95a5d8bcec27d169eeef..f3aede4d0e3d276f68684db483680782729f4ee4 100644 --- a/llvm/lib/TableGen/Record.cpp +++ b/llvm/lib/TableGen/Record.cpp @@ -777,6 +777,14 @@ void UnOpInit::Profile(FoldingSetNodeID &ID) const { Init *UnOpInit::Fold(Record *CurRec, bool IsFinal) const { RecordKeeper &RK = getRecordKeeper(); switch (getOpcode()) { + case TOLOWER: + if (StringInit *LHSs = dyn_cast(LHS)) + return StringInit::get(RK, LHSs->getValue().lower()); + break; + case TOUPPER: + if (StringInit *LHSs = dyn_cast(LHS)) + return StringInit::get(RK, LHSs->getValue().upper()); + break; case CAST: if (isa(getType())) { if (StringInit *LHSs = dyn_cast(LHS)) @@ -908,6 +916,12 @@ std::string UnOpInit::getAsString() const { case SIZE: Result = "!size"; break; case EMPTY: Result = "!empty"; break; case GETDAGOP: Result = "!getdagop"; break; + case TOLOWER: + Result = "!tolower"; + break; + case TOUPPER: + Result = "!toupper"; + break; } return Result + "(" + LHS->getAsString() + ")"; } diff --git a/llvm/lib/TableGen/TGLexer.cpp b/llvm/lib/TableGen/TGLexer.cpp index 2a4ee4473b56cb90ba0f3f6698826362afa04093..ab6e13f2eed0261189bbbc19e1c5f258406e7224 100644 --- a/llvm/lib/TableGen/TGLexer.cpp +++ b/llvm/lib/TableGen/TGLexer.cpp @@ -585,6 +585,8 @@ tgtok::TokKind TGLexer::LexExclaim() { .Cases("setdagop", "setop", tgtok::XSetDagOp) // !setop is deprecated. .Cases("getdagop", "getop", tgtok::XGetDagOp) // !getop is deprecated. .Case("exists", tgtok::XExists) + .Case("tolower", tgtok::XToLower) + .Case("toupper", tgtok::XToUpper) .Default(tgtok::Error); return Kind != tgtok::Error ? Kind : ReturnError(Start-1, "Unknown operator"); diff --git a/llvm/lib/TableGen/TGLexer.h b/llvm/lib/TableGen/TGLexer.h index 459ba0f4af6439d694c0c8674fedba4299e5daea..927cb7f16cb7b5a92fce01266130255f5299aeec 100644 --- a/llvm/lib/TableGen/TGLexer.h +++ b/llvm/lib/TableGen/TGLexer.h @@ -29,11 +29,11 @@ class SourceMgr; class Twine; namespace tgtok { - enum TokKind { - // Markers +enum TokKind { + // Markers Eof, Error, - // Tokens with no info. + // Tokens with no info. minus, plus, // - + l_square, r_square, // [ ] l_brace, r_brace, // { } @@ -45,8 +45,8 @@ namespace tgtok { paste, // # dotdotdot, // ... - // Reserved keywords. ('ElseKW' is named to distinguish it from the - // existing 'Else' that means the preprocessor #else.) + // Reserved keywords. ('ElseKW' is named to distinguish it from the + // existing 'Else' that means the preprocessor #else.) Assert, Bit, Bits, Class, Code, Dag, Def, Defm, Defset, Defvar, ElseKW, FalseKW, Field, Foreach, If, In, Include, Int, Let, List, MultiClass, String, Then, TrueKW, @@ -56,25 +56,25 @@ namespace tgtok { XListConcat, XListSplat, XStrConcat, XInterleave, XSubstr, XFind, XCast, XSubst, XForEach, XFilter, XFoldl, XHead, XTail, XSize, XEmpty, XIf, XCond, XEq, XIsA, XDag, XNe, XLe, XLt, XGe, XGt, XSetDagOp, XGetDagOp, - XExists, + XExists, XToLower, XToUpper, - // Boolean literals. + // Boolean literals. TrueVal, FalseVal, - // Integer value. - IntVal, + // Integer value. + IntVal, - // Binary constant. Note that these are sized according to the number of - // bits given. - BinaryIntVal, + // Binary constant. Note that these are sized according to the number of + // bits given. + BinaryIntVal, - // String valued tokens. + // String valued tokens. Id, StrVal, VarName, CodeFragment, - // Preprocessing tokens for internal usage by the lexer. - // They are never returned as a result of Lex(). + // Preprocessing tokens for internal usage by the lexer. + // They are never returned as a result of Lex(). Ifdef, Ifndef, Else, Endif, Define - }; +}; } /// TGLexer - TableGen Lexer class. diff --git a/llvm/lib/TableGen/TGParser.cpp b/llvm/lib/TableGen/TGParser.cpp index aab1802c53480c03f37a9d1c465d18a20e17b59d..5f9143f48dc75c49881794a0d4b70244405bab94 100644 --- a/llvm/lib/TableGen/TGParser.cpp +++ b/llvm/lib/TableGen/TGParser.cpp @@ -932,6 +932,8 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) { TokError("unknown bang operator"); return nullptr; case tgtok::XNOT: + case tgtok::XToLower: + case tgtok::XToUpper: case tgtok::XHead: case tgtok::XTail: case tgtok::XSize: @@ -954,6 +956,16 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) { return nullptr; } + break; + case tgtok::XToLower: + Lex.Lex(); // eat the operation + Code = UnOpInit::TOLOWER; + Type = StringRecTy::get(Records); + break; + case tgtok::XToUpper: + Lex.Lex(); // eat the operation + Code = UnOpInit::TOUPPER; + Type = StringRecTy::get(Records); break; case tgtok::XNOT: Lex.Lex(); // eat the operation @@ -2403,6 +2415,8 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType, case tgtok::XSize: case tgtok::XEmpty: case tgtok::XCast: + case tgtok::XToLower: + case tgtok::XToUpper: case tgtok::XGetDagOp: // Value ::= !unop '(' Value ')' case tgtok::XExists: case tgtok::XIsA: diff --git a/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp b/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp index 1e686c2443c2263a38b3259ae932cceba4c7dd1b..731dac74468181cf8be63f6d5da3ace57334ae20 100644 --- a/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp +++ b/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp @@ -122,6 +122,10 @@ class LoongArchAsmParser : public MCTargetAsmParser { // Helper to emit pseudo instruction "li.w/d $rd, $imm". void emitLoadImm(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out); + // Helper to emit pseudo instruction "call36 sym" or "tail36 $rj, sym". + void emitFuncCall36(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, + bool IsTailCall); + public: enum LoongArchMatchResultTy { Match_Dummy = FIRST_TARGET_MATCH_RESULT_TY, @@ -234,12 +238,24 @@ public: VK == LoongArchMCExpr::VK_LoongArch_None; } + bool isUImm1() const { return isUImm<1>(); } bool isUImm2() const { return isUImm<2>(); } bool isUImm2plus1() const { return isUImm<2, 1>(); } bool isUImm3() const { return isUImm<3>(); } + bool isUImm4() const { return isUImm<4>(); } + bool isSImm5() const { return isSImm<5>(); } bool isUImm5() const { return isUImm<5>(); } bool isUImm6() const { return isUImm<6>(); } + bool isUImm7() const { return isUImm<7>(); } + bool isSImm8() const { return isSImm<8>(); } + bool isSImm8lsl1() const { return isSImm<8, 1>(); } + bool isSImm8lsl2() const { return isSImm<8, 2>(); } + bool isSImm8lsl3() const { return isSImm<8, 3>(); } bool isUImm8() const { return isUImm<8>(); } + bool isSImm9lsl3() const { return isSImm<9, 3>(); } + bool isSImm10() const { return isSImm<10>(); } + bool isSImm10lsl2() const { return isSImm<10, 2>(); } + bool isSImm11lsl1() const { return isSImm<11, 1>(); } bool isSImm12() const { return isSImm<12>(); } bool isSImm12addlike() const { @@ -303,6 +319,7 @@ public: IsValidKind; } + bool isSImm13() const { return isSImm<13>(); } bool isUImm14() const { return isUImm<14>(); } bool isUImm15() const { return isUImm<15>(); } @@ -388,6 +405,22 @@ public: IsValidKind; } + bool isSImm20pcaddu18i() const { + if (!isImm()) + return false; + + int64_t Imm; + LoongArchMCExpr::VariantKind VK = LoongArchMCExpr::VK_LoongArch_None; + bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK); + bool IsValidKind = VK == LoongArchMCExpr::VK_LoongArch_None || + VK == LoongArchMCExpr::VK_LoongArch_CALL36; + + return IsConstantImm + ? isInt<20>(Imm) && IsValidKind + : LoongArchAsmParser::classifySymbolRef(getImm(), VK) && + IsValidKind; + } + bool isSImm21lsl2() const { if (!isImm()) return false; @@ -1116,6 +1149,35 @@ void LoongArchAsmParser::emitLoadImm(MCInst &Inst, SMLoc IDLoc, } } +void LoongArchAsmParser::emitFuncCall36(MCInst &Inst, SMLoc IDLoc, + MCStreamer &Out, bool IsTailCall) { + // call36 sym + // expands to: + // pcaddu18i $ra, %call36(sym) + // jirl $ra, $ra, 0 + // + // tail36 $rj, sym + // expands to: + // pcaddu18i $rj, %call36(sym) + // jirl $r0, $rj, 0 + unsigned ScratchReg = + IsTailCall ? Inst.getOperand(0).getReg() : (unsigned)LoongArch::R1; + const MCExpr *Sym = + IsTailCall ? Inst.getOperand(1).getExpr() : Inst.getOperand(0).getExpr(); + const LoongArchMCExpr *LE = LoongArchMCExpr::create( + Sym, llvm::LoongArchMCExpr::VK_LoongArch_CALL36, getContext()); + + Out.emitInstruction( + MCInstBuilder(LoongArch::PCADDU18I).addReg(ScratchReg).addExpr(LE), + getSTI()); + Out.emitInstruction( + MCInstBuilder(LoongArch::JIRL) + .addReg(IsTailCall ? (unsigned)LoongArch::R0 : ScratchReg) + .addReg(ScratchReg) + .addImm(0), + getSTI()); +} + bool LoongArchAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc, OperandVector &Operands, MCStreamer &Out) { @@ -1164,6 +1226,12 @@ bool LoongArchAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc, case LoongArch::PseudoLI_D: emitLoadImm(Inst, IDLoc, Out); return false; + case LoongArch::PseudoCALL36: + emitFuncCall36(Inst, IDLoc, Out, /*IsTailCall=*/false); + return false; + case LoongArch::PseudoTAIL36: + emitFuncCall36(Inst, IDLoc, Out, /*IsTailCall=*/true); + return false; } Out.emitInstruction(Inst, getSTI()); return false; @@ -1318,6 +1386,9 @@ bool LoongArchAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, "$rd must be different from both $rk and $rj"); case Match_RequiresLAORdDifferRj: return Error(Operands[1]->getStartLoc(), "$rd must be different from $rj"); + case Match_InvalidUImm1: + return generateImmOutOfRangeError(Operands, ErrorInfo, /*Lower=*/0, + /*Upper=*/(1 << 1) - 1); case Match_InvalidUImm2: return generateImmOutOfRangeError(Operands, ErrorInfo, /*Lower=*/0, /*Upper=*/(1 << 2) - 1); @@ -1327,12 +1398,21 @@ bool LoongArchAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, case Match_InvalidUImm3: return generateImmOutOfRangeError(Operands, ErrorInfo, /*Lower=*/0, /*Upper=*/(1 << 3) - 1); + case Match_InvalidUImm4: + return generateImmOutOfRangeError(Operands, ErrorInfo, /*Lower=*/0, + /*Upper=*/(1 << 4) - 1); case Match_InvalidUImm5: return generateImmOutOfRangeError(Operands, ErrorInfo, /*Lower=*/0, /*Upper=*/(1 << 5) - 1); case Match_InvalidUImm6: return generateImmOutOfRangeError(Operands, ErrorInfo, /*Lower=*/0, /*Upper=*/(1 << 6) - 1); + case Match_InvalidUImm7: + return generateImmOutOfRangeError(Operands, ErrorInfo, /*Lower=*/0, + /*Upper=*/(1 << 7) - 1); + case Match_InvalidUImm8: + return generateImmOutOfRangeError(Operands, ErrorInfo, /*Lower=*/0, + /*Upper=*/(1 << 8) - 1); case Match_InvalidUImm12: return generateImmOutOfRangeError(Operands, ErrorInfo, /*Lower=*/0, /*Upper=*/(1 << 12) - 1); @@ -1345,6 +1425,39 @@ bool LoongArchAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, case Match_InvalidUImm15: return generateImmOutOfRangeError(Operands, ErrorInfo, /*Lower=*/0, /*Upper=*/(1 << 15) - 1); + case Match_InvalidSImm5: + return generateImmOutOfRangeError(Operands, ErrorInfo, /*Lower=*/-(1 << 4), + /*Upper=*/(1 << 4) - 1); + case Match_InvalidSImm8: + return generateImmOutOfRangeError(Operands, ErrorInfo, /*Lower=*/-(1 << 7), + /*Upper=*/(1 << 7) - 1); + case Match_InvalidSImm8lsl1: + return generateImmOutOfRangeError( + Operands, ErrorInfo, /*Lower=*/-(1 << 8), /*Upper=*/(1 << 8) - 2, + "immediate must be a multiple of 2 in the range"); + case Match_InvalidSImm8lsl2: + return generateImmOutOfRangeError( + Operands, ErrorInfo, /*Lower=*/-(1 << 9), /*Upper=*/(1 << 9) - 4, + "immediate must be a multiple of 4 in the range"); + case Match_InvalidSImm10: + return generateImmOutOfRangeError(Operands, ErrorInfo, /*Lower=*/-(1 << 9), + /*Upper=*/(1 << 9) - 1); + case Match_InvalidSImm8lsl3: + return generateImmOutOfRangeError( + Operands, ErrorInfo, /*Lower=*/-(1 << 10), /*Upper=*/(1 << 10) - 8, + "immediate must be a multiple of 8 in the range"); + case Match_InvalidSImm9lsl3: + return generateImmOutOfRangeError( + Operands, ErrorInfo, /*Lower=*/-(1 << 11), /*Upper=*/(1 << 11) - 8, + "immediate must be a multiple of 8 in the range"); + case Match_InvalidSImm10lsl2: + return generateImmOutOfRangeError( + Operands, ErrorInfo, /*Lower=*/-(1 << 11), /*Upper=*/(1 << 11) - 4, + "immediate must be a multiple of 4 in the range"); + case Match_InvalidSImm11lsl1: + return generateImmOutOfRangeError( + Operands, ErrorInfo, /*Lower=*/-(1 << 11), /*Upper=*/(1 << 11) - 2, + "immediate must be a multiple of 2 in the range"); case Match_InvalidSImm12: return generateImmOutOfRangeError(Operands, ErrorInfo, /*Lower=*/-(1 << 11), /*Upper=*/(1 << 11) - 1); @@ -1360,6 +1473,9 @@ bool LoongArchAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, /*Upper=*/(1 << 11) - 1, "operand must be a symbol with modifier (e.g. %pc64_hi12) or an " "integer in the range"); + case Match_InvalidSImm13: + return generateImmOutOfRangeError(Operands, ErrorInfo, /*Lower=*/-(1 << 12), + /*Upper=*/(1 << 12) - 1); case Match_InvalidSImm14lsl2: return generateImmOutOfRangeError( Operands, ErrorInfo, /*Lower=*/-(1 << 15), /*Upper=*/(1 << 15) - 4, @@ -1393,6 +1509,12 @@ bool LoongArchAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, /*Upper=*/(1 << 19) - 1, "operand must be a symbol with modifier (e.g. %pc_hi20) or an integer " "in the range"); + case Match_InvalidSImm20pcaddu18i: + return generateImmOutOfRangeError( + Operands, ErrorInfo, /*Lower=*/-(1 << 19), + /*Upper=*/(1 << 19) - 1, + "operand must be a symbol with modifier (e.g. %call36) or an integer " + "in the range"); case Match_InvalidSImm21lsl2: return generateImmOutOfRangeError( Operands, ErrorInfo, /*Lower=*/-(1 << 22), /*Upper=*/(1 << 22) - 4, diff --git a/llvm/lib/Target/LoongArch/CMakeLists.txt b/llvm/lib/Target/LoongArch/CMakeLists.txt index 41a38f9eb0c5f94c32b6d554497b782ea665d6d7..1fa5fbbb36fe2ac0a2afd5a409ee8663e639e780 100644 --- a/llvm/lib/Target/LoongArch/CMakeLists.txt +++ b/llvm/lib/Target/LoongArch/CMakeLists.txt @@ -1,9 +1,20 @@ add_llvm_component_group(LoongArch) +# OHOS_LOCAL begin +if (BUILD_ARK_GC_SUPPORT) + list(APPEND LLVM_TABLEGEN_FLAGS -DARK_GC_SUPPORT) +endif() +# OHOS_LOCAL end + set(LLVM_TARGET_DEFINITIONS LoongArch.td) tablegen(LLVM LoongArchGenAsmMatcher.inc -gen-asm-matcher) tablegen(LLVM LoongArchGenAsmWriter.inc -gen-asm-writer) +# OHOS_LOCAL begin +if (BUILD_ARK_GC_SUPPORT) + tablegen(LLVM LoongArchGenCallingConv.inc -gen-callingconv) +endif() +# OHOS_LOCAL end tablegen(LLVM LoongArchGenDAGISel.inc -gen-dag-isel) tablegen(LLVM LoongArchGenDisassemblerTables.inc -gen-disassembler) tablegen(LLVM LoongArchGenInstrInfo.inc -gen-instr-info) @@ -26,6 +37,7 @@ add_llvm_target(LoongArchCodeGen LoongArchRegisterInfo.cpp LoongArchSubtarget.cpp LoongArchTargetMachine.cpp + LoongArchTargetTransformInfo.cpp LINK_COMPONENTS Analysis diff --git a/llvm/lib/Target/LoongArch/Disassembler/LoongArchDisassembler.cpp b/llvm/lib/Target/LoongArch/Disassembler/LoongArchDisassembler.cpp index 2335152e5ab17015e8e781ace14958aaf9bb4150..8b20c32783fd7f84af0d07090fbd00386021b8e7 100644 --- a/llvm/lib/Target/LoongArch/Disassembler/LoongArchDisassembler.cpp +++ b/llvm/lib/Target/LoongArch/Disassembler/LoongArchDisassembler.cpp @@ -100,6 +100,24 @@ static DecodeStatus DecodeFCSRRegisterClass(MCInst &Inst, uint64_t RegNo, return MCDisassembler::Success; } +static DecodeStatus DecodeLSX128RegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const MCDisassembler *Decoder) { + if (RegNo >= 32) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::createReg(LoongArch::VR0 + RegNo)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeLASX256RegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const MCDisassembler *Decoder) { + if (RegNo >= 32) + return MCDisassembler::Fail; + Inst.addOperand(MCOperand::createReg(LoongArch::XR0 + RegNo)); + return MCDisassembler::Success; +} + template static DecodeStatus decodeUImmOperand(MCInst &Inst, uint64_t Imm, int64_t Address, diff --git a/llvm/lib/Target/LoongArch/LoongArch.h b/llvm/lib/Target/LoongArch/LoongArch.h index a43c5f111e61a48a730eac151e020e4305e39e32..d4c89e59f22166eb6488c598e4026a4e1f936695 100644 --- a/llvm/lib/Target/LoongArch/LoongArch.h +++ b/llvm/lib/Target/LoongArch/LoongArch.h @@ -36,8 +36,10 @@ bool lowerLoongArchMachineOperandToMCOperand(const MachineOperand &MO, FunctionPass *createLoongArchExpandAtomicPseudoPass(); FunctionPass *createLoongArchISelDag(LoongArchTargetMachine &TM); FunctionPass *createLoongArchPreRAExpandPseudoPass(); +FunctionPass *createLoongArchExpandPseudoPass(); void initializeLoongArchExpandAtomicPseudoPass(PassRegistry &); void initializeLoongArchPreRAExpandPseudoPass(PassRegistry &); +void initializeLoongArchExpandPseudoPass(PassRegistry &); } // end namespace llvm #endif // LLVM_LIB_TARGET_LOONGARCH_LOONGARCH_H diff --git a/llvm/lib/Target/LoongArch/LoongArch.td b/llvm/lib/Target/LoongArch/LoongArch.td index 3e9e8b2519f3131320acce0995b2edd52e62202c..3fd68b18842b0827ec5efb2035e389d55879df35 100644 --- a/llvm/lib/Target/LoongArch/LoongArch.td +++ b/llvm/lib/Target/LoongArch/LoongArch.td @@ -36,57 +36,39 @@ def LA64 : HwMode<"+64bit">; def FeatureBasicF : SubtargetFeature<"f", "HasBasicF", "true", "'F' (Single-Precision Floating-Point)">; -def HasBasicF - : Predicate<"Subtarget->hasBasicF()">, - AssemblerPredicate<(all_of FeatureBasicF), - "'F' (Single-Precision Floating-Point)">; +def HasBasicF : Predicate<"Subtarget->hasBasicF()">; // Double Precision floating point def FeatureBasicD : SubtargetFeature<"d", "HasBasicD", "true", "'D' (Double-Precision Floating-Point)", [FeatureBasicF]>; -def HasBasicD - : Predicate<"Subtarget->hasBasicD()">, - AssemblerPredicate<(all_of FeatureBasicD), - "'D' (Double-Precision Floating-Point)">; +def HasBasicD : Predicate<"Subtarget->hasBasicD()">; // Loongson SIMD eXtension (LSX) def FeatureExtLSX : SubtargetFeature<"lsx", "HasExtLSX", "true", "'LSX' (Loongson SIMD Extension)", [FeatureBasicD]>; -def HasExtLSX - : Predicate<"Subtarget->hasExtLSX()">, - AssemblerPredicate<(all_of FeatureExtLSX), - "'LSX' (Loongson SIMD Extension)">; +def HasExtLSX : Predicate<"Subtarget->hasExtLSX()">; // Loongson Advanced SIMD eXtension (LASX) def FeatureExtLASX : SubtargetFeature<"lasx", "HasExtLASX", "true", "'LASX' (Loongson Advanced SIMD Extension)", [FeatureExtLSX]>; -def HasExtLASX - : Predicate<"Subtarget->hasExtLASX()">, - AssemblerPredicate<(all_of FeatureExtLASX), - "'LASX' (Loongson Advanced SIMD Extension)">; +def HasExtLASX : Predicate<"Subtarget->hasExtLASX()">; // Loongson VirtualiZation (LVZ) def FeatureExtLVZ : SubtargetFeature<"lvz", "HasExtLVZ", "true", "'LVZ' (Loongson Virtualization Extension)">; -def HasExtLVZ - : Predicate<"Subtarget->hasExtLVZ()">, - AssemblerPredicate<(all_of FeatureExtLVZ), - "'LVZ' (Loongson Virtualization Extension)">; +def HasExtLVZ : Predicate<"Subtarget->hasExtLVZ()">; // Loongson Binary Translation (LBT) def FeatureExtLBT : SubtargetFeature<"lbt", "HasExtLBT", "true", "'LBT' (Loongson Binary Translation Extension)">; -def HasExtLBT - : Predicate<"Subtarget->hasExtLBT()">, - AssemblerPredicate<(all_of FeatureExtLBT), - "'LBT' (Loongson Binary Translation Extension)">; +def HasExtLBT : Predicate<"Subtarget->hasExtLBT()">; // Expand la.global as la.pcrel def LaGlobalWithPcrel @@ -120,6 +102,11 @@ def FeatureUAL : SubtargetFeature<"ual", "HasUAL", "true", "Allow memory accesses to be unaligned">; +// Experimental auto vectorization +def FeatureAutoVec + : SubtargetFeature<"auto-vec", "HasExpAutoVec", "true", + "Experimental auto vectorization">; + //===----------------------------------------------------------------------===// // Registers, instruction descriptions ... //===----------------------------------------------------------------------===// @@ -155,9 +142,7 @@ def : ProcessorModel<"la464", NoSchedModel, [Feature64Bit, //===----------------------------------------------------------------------===// def LoongArchInstrInfo : InstrInfo { - // guess mayLoad, mayStore, and hasSideEffects - // This option is a temporary migration help. It will go away. - let guessInstructionProperties = 1; + let guessInstructionProperties = 0; } def LoongArchAsmParser : AsmParser { diff --git a/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp b/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp index 04fdd41d677308eac2f4fc7b741f3125f3dfe329..21706d4ca3252fc60600b5fcc7a93aff7ee1ba44 100644 --- a/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp @@ -17,6 +17,8 @@ #include "MCTargetDesc/LoongArchInstPrinter.h" #include "TargetInfo/LoongArchTargetInfo.h" #include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCInstBuilder.h" #include "llvm/MC/TargetRegistry.h" using namespace llvm; @@ -36,6 +38,9 @@ void LoongArchAsmPrinter::emitInstruction(const MachineInstr *MI) { return; switch (MI->getOpcode()) { + case TargetOpcode::STATEPOINT: + LowerSTATEPOINT(*MI); + return; case TargetOpcode::PATCHABLE_FUNCTION_ENTER: LowerPATCHABLE_FUNCTION_ENTER(*MI); return; @@ -46,6 +51,10 @@ void LoongArchAsmPrinter::emitInstruction(const MachineInstr *MI) { EmitToStreamer(*OutStreamer, TmpInst); } +void LoongArchAsmPrinter::emitEndOfAsmFile(Module &M) { + emitStackMaps(SM); +} + bool LoongArchAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, const char *ExtraCode, raw_ostream &OS) { @@ -67,6 +76,20 @@ bool LoongArchAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, return false; } break; + case 'w': // Print LSX registers. + if (MO.getReg().id() >= LoongArch::VR0 && + MO.getReg().id() <= LoongArch::VR31) + break; + // The modifier is 'w' but the operand is not an LSX register; Report an + // unknown operand error. + return true; + case 'u': // Print LASX registers. + if (MO.getReg().id() >= LoongArch::XR0 && + MO.getReg().id() <= LoongArch::XR31) + break; + // The modifier is 'u' but the operand is not an LASX register; Report an + // unknown operand error. + return true; // TODO: handle other extra codes if any. } } @@ -116,6 +139,46 @@ bool LoongArchAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, return false; } +void LoongArchAsmPrinter::LowerSTATEPOINT(const MachineInstr &MI) { + StatepointOpers SOpers(&MI); + if (unsigned PatchBytes = SOpers.getNumPatchBytes()) { + assert(PatchBytes % 4 == 0 && "Invalid number of NOP bytes requested!"); + emitNops(PatchBytes / 4); + } else { + // Lower call target and choose correct opcode. + const MachineOperand &CallTarget = SOpers.getCallTarget(); + MCOperand CallTargetMCOp; + switch (CallTarget.getType()) { + case MachineOperand::MO_GlobalAddress: + case MachineOperand::MO_ExternalSymbol: + lowerOperand(CallTarget, CallTargetMCOp); + EmitToStreamer(*OutStreamer, + MCInstBuilder(LoongArch::BL).addOperand(CallTargetMCOp)); + break; + case MachineOperand::MO_Immediate: + CallTargetMCOp = MCOperand::createImm(CallTarget.getImm()); + EmitToStreamer(*OutStreamer, + MCInstBuilder(LoongArch::BL).addOperand(CallTargetMCOp)); + break; + case MachineOperand::MO_Register: + CallTargetMCOp = MCOperand::createReg(CallTarget.getReg()); + EmitToStreamer(*OutStreamer, MCInstBuilder(LoongArch::JIRL) + .addReg(LoongArch::R1) + .addOperand(CallTargetMCOp) + .addImm(0)); + break; + default: + llvm_unreachable("Unsupported operand type in statepoint call target"); + break; + } + } + + auto &Ctx = OutStreamer->getContext(); + MCSymbol *MILabel = Ctx.createTempSymbol(); + OutStreamer->emitLabel(MILabel); + SM.recordStatepoint(*MILabel, MI); +} + void LoongArchAsmPrinter::LowerPATCHABLE_FUNCTION_ENTER( const MachineInstr &MI) { const Function &F = MF->getFunction(); diff --git a/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.h b/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.h index c8bf657f8de7c3c19f1bc7069854c3f86d419e09..a1c3b7a41e48d62e10d97d4d4639b14ffa9eb198 100644 --- a/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.h +++ b/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.h @@ -15,6 +15,7 @@ #include "LoongArchSubtarget.h" #include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/StackMaps.h" #include "llvm/MC/MCStreamer.h" #include "llvm/Support/Compiler.h" @@ -22,11 +23,13 @@ namespace llvm { class LLVM_LIBRARY_VISIBILITY LoongArchAsmPrinter : public AsmPrinter { const MCSubtargetInfo *STI; + StackMaps SM; public: explicit LoongArchAsmPrinter(TargetMachine &TM, std::unique_ptr Streamer) - : AsmPrinter(TM, std::move(Streamer)), STI(TM.getMCSubtargetInfo()) {} + : AsmPrinter(TM, std::move(Streamer)), STI(TM.getMCSubtargetInfo()), + SM(*this) {} StringRef getPassName() const override { return "LoongArch Assembly Printer"; @@ -35,12 +38,14 @@ public: bool runOnMachineFunction(MachineFunction &MF) override; void emitInstruction(const MachineInstr *MI) override; + void emitEndOfAsmFile(Module &M) override; bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, const char *ExtraCode, raw_ostream &OS) override; bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, const char *ExtraCode, raw_ostream &OS) override; + void LowerSTATEPOINT(const MachineInstr &MI); void LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI); // tblgen'erated function. diff --git a/llvm/lib/Target/LoongArch/LoongArchCallingConv.td b/llvm/lib/Target/LoongArch/LoongArchCallingConv.td index 9844163163a554d8a1fcd2a68946b71c2ebac07e..363fac19624f8ab363ac734cbeedb234e4a04279 100644 --- a/llvm/lib/Target/LoongArch/LoongArchCallingConv.td +++ b/llvm/lib/Target/LoongArch/LoongArchCallingConv.td @@ -10,6 +10,17 @@ // //===----------------------------------------------------------------------===// +#ifdef ARK_GC_SUPPORT +def CSR_ILP32S_LP64S + : CalleeSavedRegs<(add (sequence "R%u", 23, 31), R1, R22)>; + +def CSR_ILP32F_LP64F + : CalleeSavedRegs<(add (sequence "R%u", 23, 31), (sequence "F%u", 24, 31), R1, R22)>; + +def CSR_ILP32D_LP64D + : CalleeSavedRegs<(add (sequence "R%u", 23, 31), (sequence "F%u_64", 24, 31), R1, R22)>; + +#else def CSR_ILP32S_LP64S : CalleeSavedRegs<(add R1, (sequence "R%u", 22, 31))>; @@ -18,6 +29,41 @@ def CSR_ILP32F_LP64F def CSR_ILP32D_LP64D : CalleeSavedRegs<(add CSR_ILP32S_LP64S, (sequence "F%u_64", 24, 31))>; +#endif // Needed for implementation of LoongArchRegisterInfo::getNoPreservedMask() def CSR_NoRegs : CalleeSavedRegs<(add)>; + +#ifdef ARK_GC_SUPPORT +// The WebKit_JS calling convention only passes the first argument (the callee) +// in register and the remaining arguments on stack. We allow 32bit stack slots, +// so that WebKit can write partial values in the stack and define the other +// 32bit quantity as undef. +def CC_LoongArch_WebKit_JS : CallingConv<[ + // Promote i8/i16 arguments to i32 like x64. + CCIfType<[i8, i16], CCPromoteToType>, + + // Only the first integer argument is passed in register. + CCIfType<[i32, i64], CCAssignToReg<[R4]>>, + + // The remaining integer arguments are passed on the stack. 32bit integer and + // floating-point arguments are aligned to 4 byte and stored in 4 byte slots. + // 64bit integer and floating-point arguments are aligned to 8 byte and stored + // in 8 byte stack slots. + CCIfType<[i32, f32], CCAssignToStack<4, 4>>, + CCIfType<[i64, f64], CCAssignToStack<8, 8>> +]>; + +// Note: +// 1. aarch64 supports 8 return value regs while x64 only supports 1. +// 2. aarch64 supports f32 and f64 while x64 doesn't. +// 3. aarch64 returns i32/i64/f32/f64 via different regs while x64 only use RAX. +def RetCC_LoongArch_WebKit_JS : CallingConv<[ + // Promote all types to i64 like x64. + CCIfType<[i8, i16, i32], CCPromoteToType>, + + CCIfType<[i64], CCAssignToReg<[R4]>>, + CCIfType<[f32], CCAssignToReg<[F0]>>, + CCIfType<[f64], CCAssignToReg<[F0_64]>>, +]>; +#endif diff --git a/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp b/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp index bad39dc3a14fe7ee39db7d45f2244315cc706366..dbe351873332a1188ef42df7cda32d78e9bd003e 100644 --- a/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp @@ -19,13 +19,18 @@ #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/Register.h" #include "llvm/MC/MCContext.h" #include "llvm/Support/CodeGen.h" +#include "llvm/Support/ErrorHandling.h" using namespace llvm; #define LOONGARCH_PRERA_EXPAND_PSEUDO_NAME \ "LoongArch Pre-RA pseudo instruction expansion pass" +#define LOONGARCH_EXPAND_PSEUDO_NAME \ + "LoongArch pseudo instruction expansion pass" namespace { @@ -75,10 +80,6 @@ private: bool expandLoadAddressTLSGD(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, MachineBasicBlock::iterator &NextMBBI); - bool expandFunctionCALL(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - MachineBasicBlock::iterator &NextMBBI, - bool IsTailCall); }; char LoongArchPreRAExpandPseudo::ID = 0; @@ -121,10 +122,6 @@ bool LoongArchPreRAExpandPseudo::expandMI( return expandLoadAddressTLSLD(MBB, MBBI, NextMBBI); case LoongArch::PseudoLA_TLS_GD: return expandLoadAddressTLSGD(MBB, MBBI, NextMBBI); - case LoongArch::PseudoCALL: - return expandFunctionCALL(MBB, MBBI, NextMBBI, /*IsTailCall=*/false); - case LoongArch::PseudoTAIL: - return expandFunctionCALL(MBB, MBBI, NextMBBI, /*IsTailCall=*/true); } return false; } @@ -189,22 +186,44 @@ bool LoongArchPreRAExpandPseudo::expandLoadAddressTLSLE( // Code Sequence: // lu12i.w $rd, %le_hi20(sym) // ori $rd, $rd, %le_lo12(sym) + // + // And additionally if generating code using the large code model: + // + // lu32i.d $rd, %le64_lo20(sym) + // lu52i.d $rd, $rd, %le64_hi12(sym) MachineFunction *MF = MBB.getParent(); MachineInstr &MI = *MBBI; DebugLoc DL = MI.getDebugLoc(); + bool Large = MF->getTarget().getCodeModel() == CodeModel::Large; Register DestReg = MI.getOperand(0).getReg(); - Register ScratchReg = + Register Parts01 = + Large ? MF->getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass) + : DestReg; + Register Part1 = MF->getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass); MachineOperand &Symbol = MI.getOperand(1); - BuildMI(MBB, MBBI, DL, TII->get(LoongArch::LU12I_W), ScratchReg) + BuildMI(MBB, MBBI, DL, TII->get(LoongArch::LU12I_W), Part1) .addDisp(Symbol, 0, LoongArchII::MO_LE_HI); - BuildMI(MBB, MBBI, DL, TII->get(LoongArch::ORI), DestReg) - .addReg(ScratchReg) + BuildMI(MBB, MBBI, DL, TII->get(LoongArch::ORI), Parts01) + .addReg(Part1, RegState::Kill) .addDisp(Symbol, 0, LoongArchII::MO_LE_LO); + if (Large) { + Register Parts012 = + MF->getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass); + + BuildMI(MBB, MBBI, DL, TII->get(LoongArch::LU32I_D), Parts012) + // "rj" is needed due to InstrInfo pattern requirement. + .addReg(Parts01, RegState::Kill) + .addDisp(Symbol, 0, LoongArchII::MO_LE64_LO); + BuildMI(MBB, MBBI, DL, TII->get(LoongArch::LU52I_D), DestReg) + .addReg(Parts012, RegState::Kill) + .addDisp(Symbol, 0, LoongArchII::MO_LE64_HI); + } + MI.eraseFromParent(); return true; } @@ -248,7 +267,243 @@ bool LoongArchPreRAExpandPseudo::expandLoadAddressTLSGD( SecondOpcode, LoongArchII::MO_GOT_PC_LO); } -bool LoongArchPreRAExpandPseudo::expandFunctionCALL( +class LoongArchExpandPseudo : public MachineFunctionPass { +public: + const LoongArchInstrInfo *TII; + static char ID; + + LoongArchExpandPseudo() : MachineFunctionPass(ID) { + initializeLoongArchExpandPseudoPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + StringRef getPassName() const override { + return LOONGARCH_EXPAND_PSEUDO_NAME; + } + +private: + bool expandMBB(MachineBasicBlock &MBB); + bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI); + bool expandLargeAddressLoad(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI, + unsigned LastOpcode, unsigned IdentifyingMO); + bool expandLargeAddressLoad(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI, + unsigned LastOpcode, unsigned IdentifyingMO, + const MachineOperand &Symbol, Register DestReg, + bool EraseFromParent); + bool expandLoadAddressPcrelLarge(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI); + bool expandLoadAddressGotLarge(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI); + bool expandLoadAddressTLSIELarge(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI); + bool expandLoadAddressTLSLDLarge(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI); + bool expandLoadAddressTLSGDLarge(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI); + bool expandFunctionCALL(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI, + bool IsTailCall); +}; + +char LoongArchExpandPseudo::ID = 0; + +bool LoongArchExpandPseudo::runOnMachineFunction(MachineFunction &MF) { + TII = + static_cast(MF.getSubtarget().getInstrInfo()); + + bool Modified = false; + for (auto &MBB : MF) + Modified |= expandMBB(MBB); + + return Modified; +} + +bool LoongArchExpandPseudo::expandMBB(MachineBasicBlock &MBB) { + bool Modified = false; + + MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); + while (MBBI != E) { + MachineBasicBlock::iterator NMBBI = std::next(MBBI); + Modified |= expandMI(MBB, MBBI, NMBBI); + MBBI = NMBBI; + } + + return Modified; +} + +bool LoongArchExpandPseudo::expandMI(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI) { + switch (MBBI->getOpcode()) { + case LoongArch::PseudoLA_PCREL_LARGE: + return expandLoadAddressPcrelLarge(MBB, MBBI, NextMBBI); + case LoongArch::PseudoLA_GOT_LARGE: + return expandLoadAddressGotLarge(MBB, MBBI, NextMBBI); + case LoongArch::PseudoLA_TLS_IE_LARGE: + return expandLoadAddressTLSIELarge(MBB, MBBI, NextMBBI); + case LoongArch::PseudoLA_TLS_LD_LARGE: + return expandLoadAddressTLSLDLarge(MBB, MBBI, NextMBBI); + case LoongArch::PseudoLA_TLS_GD_LARGE: + return expandLoadAddressTLSGDLarge(MBB, MBBI, NextMBBI); + case LoongArch::PseudoCALL: + case LoongArch::PseudoCALL_MEDIUM: + case LoongArch::PseudoCALL_LARGE: + return expandFunctionCALL(MBB, MBBI, NextMBBI, /*IsTailCall=*/false); + case LoongArch::PseudoTAIL: + case LoongArch::PseudoTAIL_MEDIUM: + case LoongArch::PseudoTAIL_LARGE: + return expandFunctionCALL(MBB, MBBI, NextMBBI, /*IsTailCall=*/true); + } + + return false; +} + +bool LoongArchExpandPseudo::expandLargeAddressLoad( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI, unsigned LastOpcode, + unsigned IdentifyingMO) { + MachineInstr &MI = *MBBI; + return expandLargeAddressLoad(MBB, MBBI, NextMBBI, LastOpcode, IdentifyingMO, + MI.getOperand(2), MI.getOperand(0).getReg(), + true); +} + +bool LoongArchExpandPseudo::expandLargeAddressLoad( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI, unsigned LastOpcode, + unsigned IdentifyingMO, const MachineOperand &Symbol, Register DestReg, + bool EraseFromParent) { + // Code Sequence: + // + // Part1: pcalau12i $dst, %MO1(sym) + // Part0: addi.d $t8, $zero, %MO0(sym) + // Part2: lu32i.d $t8, %MO2(sym) + // Part3: lu52i.d $t8, $t8, %MO3(sym) + // Fin: LastOpcode $dst, $t8, $dst + + unsigned MO0, MO1, MO2, MO3; + switch (IdentifyingMO) { + default: + llvm_unreachable("unsupported identifying MO"); + case LoongArchII::MO_PCREL_LO: + MO0 = IdentifyingMO; + MO1 = LoongArchII::MO_PCREL_HI; + MO2 = LoongArchII::MO_PCREL64_LO; + MO3 = LoongArchII::MO_PCREL64_HI; + break; + case LoongArchII::MO_GOT_PC_HI: + case LoongArchII::MO_LD_PC_HI: + case LoongArchII::MO_GD_PC_HI: + // These cases relocate just like the GOT case, except for Part1. + MO0 = LoongArchII::MO_GOT_PC_LO; + MO1 = IdentifyingMO; + MO2 = LoongArchII::MO_GOT_PC64_LO; + MO3 = LoongArchII::MO_GOT_PC64_HI; + break; + case LoongArchII::MO_IE_PC_LO: + MO0 = IdentifyingMO; + MO1 = LoongArchII::MO_IE_PC_HI; + MO2 = LoongArchII::MO_IE_PC64_LO; + MO3 = LoongArchII::MO_IE_PC64_HI; + break; + } + + MachineInstr &MI = *MBBI; + DebugLoc DL = MI.getDebugLoc(); + Register ScratchReg = LoongArch::R20; // $t8 + + assert(MBB.getParent()->getSubtarget().is64Bit() && + "Large code model requires LA64"); + + auto Part1 = BuildMI(MBB, MBBI, DL, TII->get(LoongArch::PCALAU12I), DestReg); + auto Part0 = BuildMI(MBB, MBBI, DL, TII->get(LoongArch::ADDI_D), ScratchReg) + .addReg(LoongArch::R0); + auto Part2 = BuildMI(MBB, MBBI, DL, TII->get(LoongArch::LU32I_D), ScratchReg) + // "rj" is needed due to InstrInfo pattern requirement. + .addReg(ScratchReg); + auto Part3 = BuildMI(MBB, MBBI, DL, TII->get(LoongArch::LU52I_D), ScratchReg) + .addReg(ScratchReg); + BuildMI(MBB, MBBI, DL, TII->get(LastOpcode), DestReg) + .addReg(ScratchReg) + .addReg(DestReg); + + if (Symbol.getType() == MachineOperand::MO_ExternalSymbol) { + const char *SymName = Symbol.getSymbolName(); + Part0.addExternalSymbol(SymName, MO0); + Part1.addExternalSymbol(SymName, MO1); + Part2.addExternalSymbol(SymName, MO2); + Part3.addExternalSymbol(SymName, MO3); + } else { + Part0.addDisp(Symbol, 0, MO0); + Part1.addDisp(Symbol, 0, MO1); + Part2.addDisp(Symbol, 0, MO2); + Part3.addDisp(Symbol, 0, MO3); + } + + if (EraseFromParent) + MI.eraseFromParent(); + + return true; +} + +bool LoongArchExpandPseudo::expandLoadAddressPcrelLarge( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI) { + // Emit the 5-insn large address load sequence with the `%pc` family of + // relocs. + return expandLargeAddressLoad(MBB, MBBI, NextMBBI, LoongArch::ADD_D, + LoongArchII::MO_PCREL_LO); +} + +bool LoongArchExpandPseudo::expandLoadAddressGotLarge( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI) { + // Emit the 5-insn large address load sequence with the `%got_pc` family + // of relocs, loading the result from GOT with `ldx.d` in the end. + return expandLargeAddressLoad(MBB, MBBI, NextMBBI, LoongArch::LDX_D, + LoongArchII::MO_GOT_PC_HI); +} + +bool LoongArchExpandPseudo::expandLoadAddressTLSIELarge( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI) { + // Emit the 5-insn large address load sequence with the `%ie_pc` family + // of relocs, loading the result with `ldx.d` in the end. + return expandLargeAddressLoad(MBB, MBBI, NextMBBI, LoongArch::LDX_D, + LoongArchII::MO_IE_PC_LO); +} + +bool LoongArchExpandPseudo::expandLoadAddressTLSLDLarge( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI) { + // Emit the 5-insn large address load sequence with the `%got_pc` family + // of relocs, with the `pcalau12i` insn relocated with `%ld_pc_hi20`. + return expandLargeAddressLoad(MBB, MBBI, NextMBBI, LoongArch::ADD_D, + LoongArchII::MO_LD_PC_HI); +} + +bool LoongArchExpandPseudo::expandLoadAddressTLSGDLarge( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI) { + // Emit the 5-insn large address load sequence with the `%got_pc` family + // of relocs, with the `pcalau12i` insn relocated with `%gd_pc_hi20`. + return expandLargeAddressLoad(MBB, MBBI, NextMBBI, LoongArch::ADD_D, + LoongArchII::MO_GD_PC_HI); +} + +bool LoongArchExpandPseudo::expandFunctionCALL( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, MachineBasicBlock::iterator &NextMBBI, bool IsTailCall) { MachineFunction *MF = MBB.getParent(); @@ -273,30 +528,40 @@ bool LoongArchPreRAExpandPseudo::expandFunctionCALL( } case CodeModel::Medium: { // CALL: - // pcalau12i $ra, %pc_hi20(func) - // jirl $ra, $ra, %pc_lo12(func) + // pcaddu18i $ra, %call36(func) + // jirl $ra, $ra, 0 // TAIL: - // pcalau12i $scratch, %pc_hi20(func) - // jirl $r0, $scratch, %pc_lo12(func) + // pcaddu18i $t8, %call36(func) + // jr $t8 Opcode = IsTailCall ? LoongArch::PseudoJIRL_TAIL : LoongArch::PseudoJIRL_CALL; - Register ScratchReg = - IsTailCall - ? MF->getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass) - : LoongArch::R1; + Register ScratchReg = IsTailCall ? LoongArch::R20 : LoongArch::R1; MachineInstrBuilder MIB = - BuildMI(MBB, MBBI, DL, TII->get(LoongArch::PCALAU12I), ScratchReg); - CALL = BuildMI(MBB, MBBI, DL, TII->get(Opcode)).addReg(ScratchReg); - if (Func.isSymbol()) { - const char *FnName = Func.getSymbolName(); - MIB.addExternalSymbol(FnName, LoongArchII::MO_PCREL_HI); - CALL.addExternalSymbol(FnName, LoongArchII::MO_PCREL_LO); - break; - } - assert(Func.isGlobal() && "Expected a GlobalValue at this time"); - const GlobalValue *GV = Func.getGlobal(); - MIB.addGlobalAddress(GV, 0, LoongArchII::MO_PCREL_HI); - CALL.addGlobalAddress(GV, 0, LoongArchII::MO_PCREL_LO); + BuildMI(MBB, MBBI, DL, TII->get(LoongArch::PCADDU18I), ScratchReg); + + CALL = + BuildMI(MBB, MBBI, DL, TII->get(Opcode)).addReg(ScratchReg).addImm(0); + + if (Func.isSymbol()) + MIB.addExternalSymbol(Func.getSymbolName(), LoongArchII::MO_CALL36); + else + MIB.addDisp(Func, 0, LoongArchII::MO_CALL36); + break; + } + case CodeModel::Large: { + // Emit the 5-insn large address load sequence, either directly or + // indirectly in case of going through the GOT, then JIRL_TAIL or + // JIRL_CALL to $addr. + Opcode = + IsTailCall ? LoongArch::PseudoJIRL_TAIL : LoongArch::PseudoJIRL_CALL; + Register AddrReg = IsTailCall ? LoongArch::R19 : LoongArch::R1; + + bool UseGOT = Func.isGlobal() && !Func.getGlobal()->isDSOLocal(); + unsigned MO = UseGOT ? LoongArchII::MO_GOT_PC_HI : LoongArchII::MO_PCREL_LO; + unsigned LAOpcode = UseGOT ? LoongArch::LDX_D : LoongArch::ADD_D; + expandLargeAddressLoad(MBB, MBBI, NextMBBI, LAOpcode, MO, Func, AddrReg, + false); + CALL = BuildMI(MBB, MBBI, DL, TII->get(Opcode)).addReg(AddrReg).addImm(0); break; } } @@ -316,10 +581,16 @@ bool LoongArchPreRAExpandPseudo::expandFunctionCALL( INITIALIZE_PASS(LoongArchPreRAExpandPseudo, "loongarch-prera-expand-pseudo", LOONGARCH_PRERA_EXPAND_PSEUDO_NAME, false, false) +INITIALIZE_PASS(LoongArchExpandPseudo, "loongarch-expand-pseudo", + LOONGARCH_EXPAND_PSEUDO_NAME, false, false) + namespace llvm { FunctionPass *createLoongArchPreRAExpandPseudoPass() { return new LoongArchPreRAExpandPseudo(); } +FunctionPass *createLoongArchExpandPseudoPass() { + return new LoongArchExpandPseudo(); +} } // end namespace llvm diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td index 40e7665fb1f7ec1d50a787de0e05196f5fa35dcb..0d6d3f3a13f9bd2284f2b9a7b63a2e4b069091fd 100644 --- a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td @@ -33,93 +33,91 @@ def loongarch_ftint : SDNode<"LoongArchISD::FTINT", SDT_LoongArchFTINT>; let Predicates = [HasBasicF] in { // Arithmetic Operation Instructions -def FADD_S : FP_ALU_3R<0b00000001000000001, "fadd.s", FPR32>; -def FSUB_S : FP_ALU_3R<0b00000001000000101, "fsub.s", FPR32>; -def FMUL_S : FP_ALU_3R<0b00000001000001001, "fmul.s", FPR32>; -def FDIV_S : FP_ALU_3R<0b00000001000001101, "fdiv.s", FPR32>; -def FMADD_S : FP_ALU_4R<0b000010000001, "fmadd.s", FPR32>; -def FMSUB_S : FP_ALU_4R<0b000010000101, "fmsub.s", FPR32>; -def FNMADD_S : FP_ALU_4R<0b000010001001, "fnmadd.s", FPR32>; -def FNMSUB_S : FP_ALU_4R<0b000010001101, "fnmsub.s", FPR32>; -def FMAX_S : FP_ALU_3R<0b00000001000010001, "fmax.s", FPR32>; -def FMIN_S : FP_ALU_3R<0b00000001000010101, "fmin.s", FPR32>; -def FMAXA_S : FP_ALU_3R<0b00000001000011001, "fmaxa.s", FPR32>; -def FMINA_S : FP_ALU_3R<0b00000001000011101, "fmina.s", FPR32>; -def FABS_S : FP_ALU_2R<0b0000000100010100000001, "fabs.s", FPR32>; -def FNEG_S : FP_ALU_2R<0b0000000100010100000101, "fneg.s", FPR32>; -def FSQRT_S : FP_ALU_2R<0b0000000100010100010001, "fsqrt.s", FPR32>; -def FRECIP_S : FP_ALU_2R<0b0000000100010100010101, "frecip.s", FPR32>; -def FRSQRT_S : FP_ALU_2R<0b0000000100010100011001, "frsqrt.s", FPR32>; -def FSCALEB_S : FP_ALU_3R<0b00000001000100001, "fscaleb.s", FPR32>; -def FLOGB_S : FP_ALU_2R<0b0000000100010100001001, "flogb.s", FPR32>; -def FCOPYSIGN_S : FP_ALU_3R<0b00000001000100101, "fcopysign.s", FPR32>; -def FCLASS_S : FP_ALU_2R<0b0000000100010100001101, "fclass.s", FPR32>; +def FADD_S : FP_ALU_3R<0b00000001000000001, FPR32>; +def FSUB_S : FP_ALU_3R<0b00000001000000101, FPR32>; +def FMUL_S : FP_ALU_3R<0b00000001000001001, FPR32>; +def FDIV_S : FP_ALU_3R<0b00000001000001101, FPR32>; +def FMADD_S : FP_ALU_4R<0b000010000001, FPR32>; +def FMSUB_S : FP_ALU_4R<0b000010000101, FPR32>; +def FNMADD_S : FP_ALU_4R<0b000010001001, FPR32>; +def FNMSUB_S : FP_ALU_4R<0b000010001101, FPR32>; +def FMAX_S : FP_ALU_3R<0b00000001000010001, FPR32>; +def FMIN_S : FP_ALU_3R<0b00000001000010101, FPR32>; +def FMAXA_S : FP_ALU_3R<0b00000001000011001, FPR32>; +def FMINA_S : FP_ALU_3R<0b00000001000011101, FPR32>; +def FABS_S : FP_ALU_2R<0b0000000100010100000001, FPR32>; +def FNEG_S : FP_ALU_2R<0b0000000100010100000101, FPR32>; +def FSQRT_S : FP_ALU_2R<0b0000000100010100010001, FPR32>; +def FRECIP_S : FP_ALU_2R<0b0000000100010100010101, FPR32>; +def FRSQRT_S : FP_ALU_2R<0b0000000100010100011001, FPR32>; +def FSCALEB_S : FP_ALU_3R<0b00000001000100001, FPR32>; +def FLOGB_S : FP_ALU_2R<0b0000000100010100001001, FPR32>; +def FCOPYSIGN_S : FP_ALU_3R<0b00000001000100101, FPR32>; +def FCLASS_S : FP_ALU_2R<0b0000000100010100001101, FPR32>; // Comparison Instructions -def FCMP_CAF_S : FP_CMP; -def FCMP_CUN_S : FP_CMP; -def FCMP_CEQ_S : FP_CMP; -def FCMP_CUEQ_S : FP_CMP; -def FCMP_CLT_S : FP_CMP; -def FCMP_CULT_S : FP_CMP; -def FCMP_CLE_S : FP_CMP; -def FCMP_CULE_S : FP_CMP; -def FCMP_CNE_S : FP_CMP; -def FCMP_COR_S : FP_CMP; -def FCMP_CUNE_S : FP_CMP; -def FCMP_SAF_S : FP_CMP; -def FCMP_SUN_S : FP_CMP; -def FCMP_SEQ_S : FP_CMP; -def FCMP_SUEQ_S : FP_CMP; -def FCMP_SLT_S : FP_CMP; -def FCMP_SULT_S : FP_CMP; -def FCMP_SLE_S : FP_CMP; -def FCMP_SULE_S : FP_CMP; -def FCMP_SNE_S : FP_CMP; -def FCMP_SOR_S : FP_CMP; -def FCMP_SUNE_S : FP_CMP; +def FCMP_CAF_S : FP_CMP; +def FCMP_CUN_S : FP_CMP; +def FCMP_CEQ_S : FP_CMP; +def FCMP_CUEQ_S : FP_CMP; +def FCMP_CLT_S : FP_CMP; +def FCMP_CULT_S : FP_CMP; +def FCMP_CLE_S : FP_CMP; +def FCMP_CULE_S : FP_CMP; +def FCMP_CNE_S : FP_CMP; +def FCMP_COR_S : FP_CMP; +def FCMP_CUNE_S : FP_CMP; +def FCMP_SAF_S : FP_CMP; +def FCMP_SUN_S : FP_CMP; +def FCMP_SEQ_S : FP_CMP; +def FCMP_SUEQ_S : FP_CMP; +def FCMP_SLT_S : FP_CMP; +def FCMP_SULT_S : FP_CMP; +def FCMP_SLE_S : FP_CMP; +def FCMP_SULE_S : FP_CMP; +def FCMP_SNE_S : FP_CMP; +def FCMP_SOR_S : FP_CMP; +def FCMP_SUNE_S : FP_CMP; // Conversion Instructions -def FFINT_S_W : FP_CONV<0b0000000100011101000100, "ffint.s.w", FPR32, FPR32>; -def FTINT_W_S : FP_CONV<0b0000000100011011000001, "ftint.w.s", FPR32, FPR32>; -def FTINTRM_W_S : FP_CONV<0b0000000100011010000001, "ftintrm.w.s", FPR32, - FPR32>; -def FTINTRP_W_S : FP_CONV<0b0000000100011010010001, "ftintrp.w.s", FPR32, - FPR32>; -def FTINTRZ_W_S : FP_CONV<0b0000000100011010100001, "ftintrz.w.s", FPR32, - FPR32>; -def FTINTRNE_W_S : FP_CONV<0b0000000100011010110001, "ftintrne.w.s", FPR32, - FPR32>; -def FRINT_S : FP_CONV<0b0000000100011110010001, "frint.s", FPR32, FPR32>; +def FFINT_S_W : FP_CONV<0b0000000100011101000100, FPR32, FPR32>; +def FTINT_W_S : FP_CONV<0b0000000100011011000001, FPR32, FPR32>; +def FTINTRM_W_S : FP_CONV<0b0000000100011010000001, FPR32, FPR32>; +def FTINTRP_W_S : FP_CONV<0b0000000100011010010001, FPR32, FPR32>; +def FTINTRZ_W_S : FP_CONV<0b0000000100011010100001, FPR32, FPR32>; +def FTINTRNE_W_S : FP_CONV<0b0000000100011010110001, FPR32, FPR32>; +def FRINT_S : FP_CONV<0b0000000100011110010001, FPR32, FPR32>; // Move Instructions -def FSEL_S : FP_SEL<0b00001101000000, "fsel", FPR32>; -def FMOV_S : FP_MOV<0b0000000100010100100101, "fmov.s", FPR32, FPR32>; -def MOVGR2FR_W : FP_MOV<0b0000000100010100101001, "movgr2fr.w", FPR32, GPR>; -def MOVFR2GR_S : FP_MOV<0b0000000100010100101101, "movfr2gr.s", GPR, FPR32>; -def MOVGR2FCSR : FP_MOV<0b0000000100010100110000, "movgr2fcsr", FCSR, GPR>; -def MOVFCSR2GR : FP_MOV<0b0000000100010100110010, "movfcsr2gr", GPR, FCSR>; -def MOVFR2CF_S : FP_MOV<0b0000000100010100110100, "movfr2cf", CFR, FPR32>; -def MOVCF2FR_S : FP_MOV<0b0000000100010100110101, "movcf2fr", FPR32, CFR>; -def MOVGR2CF : FP_MOV<0b0000000100010100110110, "movgr2cf", CFR, GPR>; -def MOVCF2GR : FP_MOV<0b0000000100010100110111, "movcf2gr", GPR, CFR>; +def FSEL_xS : FP_SEL<0b00001101000000, FPR32>; +def FMOV_S : FP_MOV<0b0000000100010100100101, FPR32, FPR32>; +def MOVGR2FR_W : FP_MOV<0b0000000100010100101001, FPR32, GPR>; +def MOVFR2GR_S : FP_MOV<0b0000000100010100101101, GPR, FPR32>; +let hasSideEffects = 1 in { +def MOVGR2FCSR : FP_MOV<0b0000000100010100110000, FCSR, GPR>; +def MOVFCSR2GR : FP_MOV<0b0000000100010100110010, GPR, FCSR>; +} // hasSideEffects = 1 +def MOVFR2CF_xS : FP_MOV<0b0000000100010100110100, CFR, FPR32>; +def MOVCF2FR_xS : FP_MOV<0b0000000100010100110101, FPR32, CFR>; +def MOVGR2CF : FP_MOV<0b0000000100010100110110, CFR, GPR>; +def MOVCF2GR : FP_MOV<0b0000000100010100110111, GPR, CFR>; // Branch Instructions -def BCEQZ : FP_BRANCH<0b01001000, "bceqz">; -def BCNEZ : FP_BRANCH<0b01001001, "bcnez">; +def BCEQZ : FP_BRANCH<0b01001000>; +def BCNEZ : FP_BRANCH<0b01001001>; // Common Memory Access Instructions -def FLD_S : FP_LOAD_2RI12<0b0010101100, "fld.s", FPR32>; -def FST_S : FP_STORE_2RI12<0b0010101101, "fst.s", FPR32>; -def FLDX_S : FP_LOAD_3R<0b00111000001100000, "fldx.s", FPR32>; -def FSTX_S : FP_STORE_3R<0b00111000001110000, "fstx.s", FPR32>; +def FLD_S : FP_LOAD_2RI12<0b0010101100, FPR32>; +def FST_S : FP_STORE_2RI12<0b0010101101, FPR32>; +def FLDX_S : FP_LOAD_3R<0b00111000001100000, FPR32>; +def FSTX_S : FP_STORE_3R<0b00111000001110000, FPR32>; // Bound Check Memory Access Instructions -def FLDGT_S : FP_LOAD_3R<0b00111000011101000, "fldgt.s", FPR32>; -def FLDLE_S : FP_LOAD_3R<0b00111000011101010, "fldle.s", FPR32>; -def FSTGT_S : FP_STORE_3R<0b00111000011101100, "fstgt.s", FPR32>; -def FSTLE_S : FP_STORE_3R<0b00111000011101110, "fstle.s", FPR32>; +def FLDGT_S : FP_LOAD_3R<0b00111000011101000, FPR32>; +def FLDLE_S : FP_LOAD_3R<0b00111000011101010, FPR32>; +def FSTGT_S : FP_STORE_3R<0b00111000011101100, FPR32>; +def FSTLE_S : FP_STORE_3R<0b00111000011101110, FPR32>; // Pseudo instructions for spill/reload CFRs. let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in @@ -223,7 +221,7 @@ def : PatStrictFsetccs; /// Select def : Pat<(select CFR:$cc, FPR32:$fk, FPR32:$fj), - (FSEL_S FPR32:$fj, FPR32:$fk, CFR:$cc)>; + (FSEL_xS FPR32:$fj, FPR32:$fk, CFR:$cc)>; /// Selectcc @@ -231,16 +229,16 @@ class PatFPSelectcc : Pat<(select (GRLenVT (setcc RegTy:$a, RegTy:$b, cc)), RegTy:$t, RegTy:$f), (SelInst RegTy:$f, RegTy:$t, (CmpInst RegTy:$a, RegTy:$b))>; -def : PatFPSelectcc; -def : PatFPSelectcc; -def : PatFPSelectcc; -def : PatFPSelectcc; -def : PatFPSelectcc; -def : PatFPSelectcc; -def : PatFPSelectcc; -def : PatFPSelectcc; -def : PatFPSelectcc; -def : PatFPSelectcc; +def : PatFPSelectcc; +def : PatFPSelectcc; +def : PatFPSelectcc; +def : PatFPSelectcc; +def : PatFPSelectcc; +def : PatFPSelectcc; +def : PatFPSelectcc; +def : PatFPSelectcc; +def : PatFPSelectcc; +def : PatFPSelectcc; /// Loads @@ -279,8 +277,12 @@ def : Pat<(fneg (fma FPR32:$fj, FPR32:$fk, FPR32:$fa)), def : Pat<(fma_nsz (fneg FPR32:$fj), FPR32:$fk, (fneg FPR32:$fa)), (FNMADD_S FPR32:$fj, FPR32:$fk, FPR32:$fa)>; -// fnmsub.s: -fj * fk + fa -def : Pat<(fma (fneg FPR32:$fj), FPR32:$fk, FPR32:$fa), +// fnmsub.s: -(fj * fk - fa) +def : Pat<(fneg (fma FPR32:$fj, FPR32:$fk, (fneg FPR32:$fa))), + (FNMSUB_S FPR32:$fj, FPR32:$fk, FPR32:$fa)>; + +// fnmsub.s: -fj * fk + fa (the nsz flag on the FMA) +def : Pat<(fma_nsz (fneg FPR32:$fj), FPR32:$fk, FPR32:$fa), (FNMSUB_S FPR32:$fj, FPR32:$fk, FPR32:$fa)>; } // Predicates = [HasBasicF] diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td index 50d7e9920ea99e42c74a35a7a18759bb807b1168..9b2c7176d1aded3e7cd22b162c770f8d24ed389c 100644 --- a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td @@ -17,123 +17,111 @@ let Predicates = [HasBasicD] in { // Arithmetic Operation Instructions -def FADD_D : FP_ALU_3R<0b00000001000000010, "fadd.d", FPR64>; -def FSUB_D : FP_ALU_3R<0b00000001000000110, "fsub.d", FPR64>; -def FMUL_D : FP_ALU_3R<0b00000001000001010, "fmul.d", FPR64>; -def FDIV_D : FP_ALU_3R<0b00000001000001110, "fdiv.d", FPR64>; -def FMADD_D : FP_ALU_4R<0b000010000010, "fmadd.d", FPR64>; -def FMSUB_D : FP_ALU_4R<0b000010000110, "fmsub.d", FPR64>; -def FNMADD_D : FP_ALU_4R<0b000010001010, "fnmadd.d", FPR64>; -def FNMSUB_D : FP_ALU_4R<0b000010001110, "fnmsub.d", FPR64>; -def FMAX_D : FP_ALU_3R<0b00000001000010010, "fmax.d", FPR64>; -def FMIN_D : FP_ALU_3R<0b00000001000010110, "fmin.d", FPR64>; -def FMAXA_D : FP_ALU_3R<0b00000001000011010, "fmaxa.d", FPR64>; -def FMINA_D : FP_ALU_3R<0b00000001000011110, "fmina.d", FPR64>; -def FABS_D : FP_ALU_2R<0b0000000100010100000010, "fabs.d", FPR64>; -def FNEG_D : FP_ALU_2R<0b0000000100010100000110, "fneg.d", FPR64>; -def FSQRT_D : FP_ALU_2R<0b0000000100010100010010, "fsqrt.d", FPR64>; -def FRECIP_D : FP_ALU_2R<0b0000000100010100010110, "frecip.d", FPR64>; -def FRSQRT_D : FP_ALU_2R<0b0000000100010100011010, "frsqrt.d", FPR64>; -def FSCALEB_D : FP_ALU_3R<0b00000001000100010, "fscaleb.d", FPR64>; -def FLOGB_D : FP_ALU_2R<0b0000000100010100001010, "flogb.d", FPR64>; -def FCOPYSIGN_D : FP_ALU_3R<0b00000001000100110, "fcopysign.d", FPR64>; -def FCLASS_D : FP_ALU_2R<0b0000000100010100001110, "fclass.d", FPR64>; +def FADD_D : FP_ALU_3R<0b00000001000000010, FPR64>; +def FSUB_D : FP_ALU_3R<0b00000001000000110, FPR64>; +def FMUL_D : FP_ALU_3R<0b00000001000001010, FPR64>; +def FDIV_D : FP_ALU_3R<0b00000001000001110, FPR64>; +def FMADD_D : FP_ALU_4R<0b000010000010, FPR64>; +def FMSUB_D : FP_ALU_4R<0b000010000110, FPR64>; +def FNMADD_D : FP_ALU_4R<0b000010001010, FPR64>; +def FNMSUB_D : FP_ALU_4R<0b000010001110, FPR64>; +def FMAX_D : FP_ALU_3R<0b00000001000010010, FPR64>; +def FMIN_D : FP_ALU_3R<0b00000001000010110, FPR64>; +def FMAXA_D : FP_ALU_3R<0b00000001000011010, FPR64>; +def FMINA_D : FP_ALU_3R<0b00000001000011110, FPR64>; +def FABS_D : FP_ALU_2R<0b0000000100010100000010, FPR64>; +def FNEG_D : FP_ALU_2R<0b0000000100010100000110, FPR64>; +def FSQRT_D : FP_ALU_2R<0b0000000100010100010010, FPR64>; +def FRECIP_D : FP_ALU_2R<0b0000000100010100010110, FPR64>; +def FRSQRT_D : FP_ALU_2R<0b0000000100010100011010, FPR64>; +def FSCALEB_D : FP_ALU_3R<0b00000001000100010, FPR64>; +def FLOGB_D : FP_ALU_2R<0b0000000100010100001010, FPR64>; +def FCOPYSIGN_D : FP_ALU_3R<0b00000001000100110, FPR64>; +def FCLASS_D : FP_ALU_2R<0b0000000100010100001110, FPR64>; // Comparison Instructions -def FCMP_CAF_D : FP_CMP; -def FCMP_CUN_D : FP_CMP; -def FCMP_CEQ_D : FP_CMP; -def FCMP_CUEQ_D : FP_CMP; -def FCMP_CLT_D : FP_CMP; -def FCMP_CULT_D : FP_CMP; -def FCMP_CLE_D : FP_CMP; -def FCMP_CULE_D : FP_CMP; -def FCMP_CNE_D : FP_CMP; -def FCMP_COR_D : FP_CMP; -def FCMP_CUNE_D : FP_CMP; -def FCMP_SAF_D : FP_CMP; -def FCMP_SUN_D : FP_CMP; -def FCMP_SEQ_D : FP_CMP; -def FCMP_SUEQ_D : FP_CMP; -def FCMP_SLT_D : FP_CMP; -def FCMP_SULT_D : FP_CMP; -def FCMP_SLE_D : FP_CMP; -def FCMP_SULE_D : FP_CMP; -def FCMP_SNE_D : FP_CMP; -def FCMP_SOR_D : FP_CMP; -def FCMP_SUNE_D : FP_CMP; +def FCMP_CAF_D : FP_CMP; +def FCMP_CUN_D : FP_CMP; +def FCMP_CEQ_D : FP_CMP; +def FCMP_CUEQ_D : FP_CMP; +def FCMP_CLT_D : FP_CMP; +def FCMP_CULT_D : FP_CMP; +def FCMP_CLE_D : FP_CMP; +def FCMP_CULE_D : FP_CMP; +def FCMP_CNE_D : FP_CMP; +def FCMP_COR_D : FP_CMP; +def FCMP_CUNE_D : FP_CMP; +def FCMP_SAF_D : FP_CMP; +def FCMP_SUN_D : FP_CMP; +def FCMP_SEQ_D : FP_CMP; +def FCMP_SUEQ_D : FP_CMP; +def FCMP_SLT_D : FP_CMP; +def FCMP_SULT_D : FP_CMP; +def FCMP_SLE_D : FP_CMP; +def FCMP_SULE_D : FP_CMP; +def FCMP_SNE_D : FP_CMP; +def FCMP_SOR_D : FP_CMP; +def FCMP_SUNE_D : FP_CMP; // Conversion Instructions -def FFINT_S_L : FP_CONV<0b0000000100011101000110, "ffint.s.l", FPR32, FPR64>; -def FTINT_L_S : FP_CONV<0b0000000100011011001001, "ftint.l.s", FPR64, FPR32>; -def FTINTRM_L_S : FP_CONV<0b0000000100011010001001, "ftintrm.l.s", FPR64, - FPR32>; -def FTINTRP_L_S : FP_CONV<0b0000000100011010011001, "ftintrp.l.s", FPR64, - FPR32>; -def FTINTRZ_L_S : FP_CONV<0b0000000100011010101001, "ftintrz.l.s", FPR64, - FPR32>; -def FTINTRNE_L_S : FP_CONV<0b0000000100011010111001, "ftintrne.l.s", FPR64, - FPR32>; -def FCVT_S_D : FP_CONV<0b0000000100011001000110, "fcvt.s.d", FPR32, FPR64>; -def FCVT_D_S : FP_CONV<0b0000000100011001001001, "fcvt.d.s", FPR64, FPR32>; -def FFINT_D_W : FP_CONV<0b0000000100011101001000, "ffint.d.w", FPR64, FPR32>; -def FFINT_D_L : FP_CONV<0b0000000100011101001010, "ffint.d.l", FPR64, FPR64>; -def FTINT_W_D : FP_CONV<0b0000000100011011000010, "ftint.w.d", FPR32, FPR64>; -def FTINT_L_D : FP_CONV<0b0000000100011011001010, "ftint.l.d", FPR64, FPR64>; -def FTINTRM_W_D : FP_CONV<0b0000000100011010000010, "ftintrm.w.d", FPR32, - FPR64>; -def FTINTRM_L_D : FP_CONV<0b0000000100011010001010, "ftintrm.l.d", FPR64, - FPR64>; -def FTINTRP_W_D : FP_CONV<0b0000000100011010010010, "ftintrp.w.d", FPR32, - FPR64>; -def FTINTRP_L_D : FP_CONV<0b0000000100011010011010, "ftintrp.l.d", FPR64, - FPR64>; -def FTINTRZ_W_D : FP_CONV<0b0000000100011010100010, "ftintrz.w.d", FPR32, - FPR64>; -def FTINTRZ_L_D : FP_CONV<0b0000000100011010101010, "ftintrz.l.d", FPR64, - FPR64>; -def FTINTRNE_W_D : FP_CONV<0b0000000100011010110010, "ftintrne.w.d", FPR32, - FPR64>; -def FTINTRNE_L_D : FP_CONV<0b0000000100011010111010, "ftintrne.l.d", FPR64, - FPR64>; -def FRINT_D : FP_CONV<0b0000000100011110010010, "frint.d", FPR64, FPR64>; +def FFINT_S_L : FP_CONV<0b0000000100011101000110, FPR32, FPR64>; +def FTINT_L_S : FP_CONV<0b0000000100011011001001, FPR64, FPR32>; +def FTINTRM_L_S : FP_CONV<0b0000000100011010001001, FPR64, FPR32>; +def FTINTRP_L_S : FP_CONV<0b0000000100011010011001, FPR64, FPR32>; +def FTINTRZ_L_S : FP_CONV<0b0000000100011010101001, FPR64, FPR32>; +def FTINTRNE_L_S : FP_CONV<0b0000000100011010111001, FPR64, FPR32>; +def FCVT_S_D : FP_CONV<0b0000000100011001000110, FPR32, FPR64>; +def FCVT_D_S : FP_CONV<0b0000000100011001001001, FPR64, FPR32>; +def FFINT_D_W : FP_CONV<0b0000000100011101001000, FPR64, FPR32>; +def FFINT_D_L : FP_CONV<0b0000000100011101001010, FPR64, FPR64>; +def FTINT_W_D : FP_CONV<0b0000000100011011000010, FPR32, FPR64>; +def FTINT_L_D : FP_CONV<0b0000000100011011001010, FPR64, FPR64>; +def FTINTRM_W_D : FP_CONV<0b0000000100011010000010, FPR32, FPR64>; +def FTINTRM_L_D : FP_CONV<0b0000000100011010001010, FPR64, FPR64>; +def FTINTRP_W_D : FP_CONV<0b0000000100011010010010, FPR32, FPR64>; +def FTINTRP_L_D : FP_CONV<0b0000000100011010011010, FPR64, FPR64>; +def FTINTRZ_W_D : FP_CONV<0b0000000100011010100010, FPR32, FPR64>; +def FTINTRZ_L_D : FP_CONV<0b0000000100011010101010, FPR64, FPR64>; +def FTINTRNE_W_D : FP_CONV<0b0000000100011010110010, FPR32, FPR64>; +def FTINTRNE_L_D : FP_CONV<0b0000000100011010111010, FPR64, FPR64>; +def FRINT_D : FP_CONV<0b0000000100011110010010, FPR64, FPR64>; // Move Instructions -def FMOV_D : FP_MOV<0b0000000100010100100110, "fmov.d", FPR64, FPR64>; -def MOVFRH2GR_S : FP_MOV<0b0000000100010100101111, "movfrh2gr.s", GPR, FPR64>; +def FMOV_D : FP_MOV<0b0000000100010100100110, FPR64, FPR64>; +def MOVFRH2GR_S : FP_MOV<0b0000000100010100101111, GPR, FPR64>; let isCodeGenOnly = 1 in { -def MOVFR2GR_S_64 : FP_MOV<0b0000000100010100101101, "movfr2gr.s", GPR, FPR64>; -def FSEL_D : FP_SEL<0b00001101000000, "fsel", FPR64>; +def MOVFR2GR_S_64 : FP_MOV<0b0000000100010100101101, GPR, FPR64>; +def FSEL_xD : FP_SEL<0b00001101000000, FPR64>; } // isCodeGenOnly = 1 -let Constraints = "$dst = $out" in { +let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Constraints = "$dst = $out" in { def MOVGR2FRH_W : FPFmtMOV<0b0000000100010100101011, (outs FPR64:$out), - (ins FPR64:$dst, GPR:$src), "movgr2frh.w", + (ins FPR64:$dst, GPR:$src), "$dst, $src">; -} // Constraints = "$dst = $out" +} // hasSideEffects = 0, mayLoad = 0, mayStore = 0, Constraints = "$dst = $out" // Common Memory Access Instructions -def FLD_D : FP_LOAD_2RI12<0b0010101110, "fld.d", FPR64>; -def FST_D : FP_STORE_2RI12<0b0010101111, "fst.d", FPR64>; -def FLDX_D : FP_LOAD_3R<0b00111000001101000, "fldx.d", FPR64>; -def FSTX_D : FP_STORE_3R<0b00111000001111000, "fstx.d", FPR64>; +def FLD_D : FP_LOAD_2RI12<0b0010101110, FPR64>; +def FST_D : FP_STORE_2RI12<0b0010101111, FPR64>; +def FLDX_D : FP_LOAD_3R<0b00111000001101000, FPR64>; +def FSTX_D : FP_STORE_3R<0b00111000001111000, FPR64>; // Bound Check Memory Access Instructions -def FLDGT_D : FP_LOAD_3R<0b00111000011101001, "fldgt.d", FPR64>; -def FLDLE_D : FP_LOAD_3R<0b00111000011101011, "fldle.d", FPR64>; -def FSTGT_D : FP_STORE_3R<0b00111000011101101, "fstgt.d", FPR64>; -def FSTLE_D : FP_STORE_3R<0b00111000011101111, "fstle.d", FPR64>; +def FLDGT_D : FP_LOAD_3R<0b00111000011101001, FPR64>; +def FLDLE_D : FP_LOAD_3R<0b00111000011101011, FPR64>; +def FSTGT_D : FP_STORE_3R<0b00111000011101101, FPR64>; +def FSTLE_D : FP_STORE_3R<0b00111000011101111, FPR64>; } // Predicates = [HasBasicD] // Instructions only available on LA64 let Predicates = [HasBasicD, IsLA64] in { -def MOVGR2FR_D : FP_MOV<0b0000000100010100101010, "movgr2fr.d", FPR64, GPR>; -def MOVFR2GR_D : FP_MOV<0b0000000100010100101110, "movfr2gr.d", GPR, FPR64>; +def MOVGR2FR_D : FP_MOV<0b0000000100010100101010, FPR64, GPR>; +def MOVFR2GR_D : FP_MOV<0b0000000100010100101110, GPR, FPR64>; } // Predicates = [HasBasicD, IsLA64] // Instructions only available on LA32 let Predicates = [HasBasicD, IsLA32], isCodeGenOnly = 1 in { -def MOVGR2FR_W_64 : FP_MOV<0b0000000100010100101001, "movgr2fr.w", FPR64, GPR>; +def MOVGR2FR_W_64 : FP_MOV<0b0000000100010100101001, FPR64, GPR>; } // Predicates = [HasBasicD, IsLA32], isCodeGenOnly = 1 //===----------------------------------------------------------------------===// @@ -213,20 +201,20 @@ def : PatStrictFsetccs; /// Select def : Pat<(select CFR:$cc, FPR64:$fk, FPR64:$fj), - (FSEL_D FPR64:$fj, FPR64:$fk, CFR:$cc)>; + (FSEL_xD FPR64:$fj, FPR64:$fk, CFR:$cc)>; /// Selectcc -def : PatFPSelectcc; -def : PatFPSelectcc; -def : PatFPSelectcc; -def : PatFPSelectcc; -def : PatFPSelectcc; -def : PatFPSelectcc; -def : PatFPSelectcc; -def : PatFPSelectcc; -def : PatFPSelectcc; -def : PatFPSelectcc; +def : PatFPSelectcc; +def : PatFPSelectcc; +def : PatFPSelectcc; +def : PatFPSelectcc; +def : PatFPSelectcc; +def : PatFPSelectcc; +def : PatFPSelectcc; +def : PatFPSelectcc; +def : PatFPSelectcc; +def : PatFPSelectcc; /// Loads @@ -268,7 +256,11 @@ def : Pat<(fma_nsz (fneg FPR64:$fj), FPR64:$fk, (fneg FPR64:$fa)), (FNMADD_D FPR64:$fj, FPR64:$fk, FPR64:$fa)>; // fnmsub.d: -(fj * fk - fa) -def : Pat<(fma (fneg FPR64:$fj), FPR64:$fk, FPR64:$fa), +def : Pat<(fneg (fma FPR64:$fj, FPR64:$fk, (fneg FPR64:$fa))), + (FNMSUB_D FPR64:$fj, FPR64:$fk, FPR64:$fa)>; + +// fnmsub.d: -fj * fk + fa (the nsz flag on the FMA) +def : Pat<(fma_nsz (fneg FPR64:$fj), FPR64:$fk, FPR64:$fa), (FNMSUB_D FPR64:$fj, FPR64:$fk, FPR64:$fa)>; } // Predicates = [HasBasicD] diff --git a/llvm/lib/Target/LoongArch/LoongArchFloatInstrFormats.td b/llvm/lib/Target/LoongArch/LoongArchFloatInstrFormats.td index d2ba1fdfffe4bdc22eccac31380e9f69711fa3aa..eebaee2f59682216b7f2cf600e58086aa7ea40a5 100644 --- a/llvm/lib/Target/LoongArch/LoongArchFloatInstrFormats.td +++ b/llvm/lib/Target/LoongArch/LoongArchFloatInstrFormats.td @@ -16,11 +16,24 @@ // //===----------------------------------------------------------------------===// +// Some FP instructions are defined twice, for accepting FPR32 and FPR64, but +// with the same mnemonic. Also some are codegen-only definitions that +// nevertheless require a "normal" mnemonic. +// +// In order to accommodate these needs, the instruction defs have names +// suffixed with `_x[SD]` or `_64`, that will get trimmed before the mnemonics +// are derived. +class deriveFPInsnMnemonic { + string ret = deriveInsnMnemonic.ret; +} + // 2R-type // -class FPFmt2R op, dag outs, dag ins, string opcstr, string opnstr, +class FPFmt2R op, dag outs, dag ins, string opnstr, list pattern = []> - : LAInst { + : LAInst.ret, opnstr, pattern> { bits<5> fj; bits<5> fd; @@ -31,9 +44,9 @@ class FPFmt2R op, dag outs, dag ins, string opcstr, string opnstr, // 3R-type // -class FPFmt3R op, dag outs, dag ins, string opcstr, string opnstr, +class FPFmt3R op, dag outs, dag ins, string opnstr, list pattern = []> - : LAInst { + : LAInst.ret, opnstr, pattern> { bits<5> fk; bits<5> fj; bits<5> fd; @@ -46,9 +59,9 @@ class FPFmt3R op, dag outs, dag ins, string opcstr, string opnstr, // 4R-type // -class FPFmt4R op, dag outs, dag ins, string opcstr, string opnstr, +class FPFmt4R op, dag outs, dag ins, string opnstr, list pattern = []> - : LAInst { + : LAInst.ret, opnstr, pattern> { bits<5> fa; bits<5> fk; bits<5> fj; @@ -63,9 +76,9 @@ class FPFmt4R op, dag outs, dag ins, string opcstr, string opnstr, // 2RI12-type // -class FPFmt2RI12 op, dag outs, dag ins, string opcstr, string opnstr, +class FPFmt2RI12 op, dag outs, dag ins, string opnstr, list pattern = []> - : LAInst { + : LAInst.ret, opnstr, pattern> { bits<12> imm12; bits<5> rj; bits<5> fd; @@ -78,9 +91,9 @@ class FPFmt2RI12 op, dag outs, dag ins, string opcstr, string opnstr, // FmtFCMP // -class FPFmtFCMP op, bits<5> cond, dag outs, dag ins, string opcstr, - string opnstr, list pattern = []> - : LAInst { +class FPFmtFCMP op, bits<5> cond, dag outs, dag ins, string opnstr, + list pattern = []> + : LAInst.ret, opnstr, pattern> { bits<5> fk; bits<5> fj; bits<3> cd; @@ -95,9 +108,9 @@ class FPFmtFCMP op, bits<5> cond, dag outs, dag ins, string opcstr, // FPFmtBR // -class FPFmtBR opcode, dag outs, dag ins, string opcstr, - string opnstr, list pattern = []> - : LAInst { +class FPFmtBR opcode, dag outs, dag ins, string opnstr, + list pattern = []> + : LAInst.ret, opnstr, pattern> { bits<21> imm21; bits<3> cj; @@ -110,9 +123,9 @@ class FPFmtBR opcode, dag outs, dag ins, string opcstr, // FmtFSEL // -class FPFmtFSEL op, dag outs, dag ins, string opcstr, string opnstr, +class FPFmtFSEL op, dag outs, dag ins, string opnstr, list pattern = []> - : LAInst { + : LAInst.ret, opnstr, pattern> { bits<3> ca; bits<5> fk; bits<5> fj; @@ -127,9 +140,9 @@ class FPFmtFSEL op, dag outs, dag ins, string opcstr, string opnstr, // FPFmtMOV // -class FPFmtMOV op, dag outs, dag ins, string opcstr, string opnstr, +class FPFmtMOV op, dag outs, dag ins, string opnstr, list pattern = []> - : LAInst { + : LAInst.ret, opnstr, pattern> { bits<5> src; bits<5> dst; @@ -140,9 +153,9 @@ class FPFmtMOV op, dag outs, dag ins, string opcstr, string opnstr, // FPFmtMEM // -class FPFmtMEM op, dag outs, dag ins, string opcstr, string opnstr, +class FPFmtMEM op, dag outs, dag ins, string opnstr, list pattern = []> - : LAInst { + : LAInst.ret, opnstr, pattern> { bits<5> rk; bits<5> rj; bits<5> fd; @@ -157,15 +170,17 @@ class FPFmtMEM op, dag outs, dag ins, string opcstr, string opnstr, // Instruction class templates //===----------------------------------------------------------------------===// -class FP_ALU_2R op, string opstr, RegisterClass rc> - : FPFmt2R; +let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in { +class FP_ALU_2R op, RegisterClass rc> + : FPFmt2R; -class FP_ALU_3R op, string opstr, RegisterClass rc> - : FPFmt3R; +class FP_ALU_3R op, RegisterClass rc> + : FPFmt3R; -class FP_ALU_4R op, string opstr, RegisterClass rc> - : FPFmt4R op, RegisterClass rc> + : FPFmt4R; +} // hasSideEffects = 0, mayLoad = 0, mayStore = 0 class FPCMPOpc value> { bits<12> val = value; @@ -175,44 +190,46 @@ class FPCMPCond value> { bits<5> val = value; } -class FP_CMP - : FPFmtFCMP + : FPFmtFCMP; -class FP_CONV op, string opstr, RegisterClass rcd, RegisterClass rcs> - : FPFmt2R; +class FP_CONV op, RegisterClass rcd, RegisterClass rcs> + : FPFmt2R; -class FP_MOV op, string opstr, RegisterClass rcd, RegisterClass rcs> - : FPFmtMOV; +class FP_MOV op, RegisterClass rcd, RegisterClass rcs> + : FPFmtMOV; -class FP_SEL op, string opstr, RegisterClass rc> - : FPFmtFSEL op, RegisterClass rc> + : FPFmtFSEL; -class FP_BRANCH opcode, string opstr> - : FPFmtBR opcode> + : FPFmtBR { let isBranch = 1; let isTerminator = 1; } +} // hasSideEffects = 0, mayLoad = 0, mayStore = 0 -let mayLoad = 1 in { -class FP_LOAD_3R op, string opstr, RegisterClass rc> - : FPFmtMEM op, RegisterClass rc> + : FPFmtMEM; -class FP_LOAD_2RI12 op, string opstr, RegisterClass rc> - : FPFmt2RI12 op, RegisterClass rc> + : FPFmt2RI12; -} // mayLoad = 1 +} // hasSideEffects = 0, mayLoad = 1, mayStore = 0 -let mayStore = 1 in { -class FP_STORE_3R op, string opstr, RegisterClass rc> - : FPFmtMEM op, RegisterClass rc> + : FPFmtMEM; -class FP_STORE_2RI12 op, string opstr, RegisterClass rc> - : FPFmt2RI12 op, RegisterClass rc> + : FPFmt2RI12; -} // mayStore = 1 +} // hasSideEffects = 0, mayLoad = 0, mayStore = 1 def FPCMP_OPC_S : FPCMPOpc<0b000011000001>; def FPCMP_OPC_D : FPCMPOpc<0b000011000010>; diff --git a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp index 7c51e213f2d745f5948f64c1cf44cb3c3a9a6f1a..a9903cc65746b46bb196f2497ea76bad27102cda 100644 --- a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp @@ -176,6 +176,16 @@ void LoongArchFrameLowering::processFunctionBeforeFrameFinalized( } } +// OHOS_LOCAL begin +#ifdef ARK_GC_SUPPORT +Triple::ArchType LoongArchFrameLowering::GetArkSupportTarget() const { + return Triple::loongarch64; +} + +int LoongArchFrameLowering::GetFixedFpPosition() const { return -1; } +#endif +// OHOS_LOCAL end + void LoongArchFrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineFrameInfo &MFI = MF.getFrameInfo(); @@ -193,8 +203,14 @@ void LoongArchFrameLowering::emitPrologue(MachineFunction &MF, DebugLoc DL; // All calls are tail calls in GHC calling conv, and functions have no // prologue/epilogue. +#ifndef ARK_GC_SUPPORT // OHOS_LOCAL if (MF.getFunction().getCallingConv() == CallingConv::GHC) return; +// OHOS_LOCAL begin +#endif + // asm-int GHC call webkit function, we need push regs to stack. + // OHOS_LOCAL end + // Determine the correct frame layout determineFrameLayout(MF); @@ -231,12 +247,54 @@ void LoongArchFrameLowering::emitPrologue(MachineFunction &MF, // to the stack, not before. std::advance(MBBI, CSI.size()); + // OHOS_LOCAL begin + // Frame layout without ARK_GC_SUPPORT + // +--------------+ <-- caller SP (CFA) + // | var args | + // +--------------+ <-- FP + // | ra | + // | fp | + // | callee saved | + // +--------------+ + // | ... | + // +--------------+ <-- SP + // | + // v + // + // Frame layout with ARK_GC_SUPPORT + // +--------------+ <-- caller SP (CFA) + // | var args | + // +--------------+ + // | callee saved | + // | ra | + // | fp | + // +--------------+ <-- FP + // | frame type | + // +--------------+ + // | ... | + // +--------------+ <-- SP + // | + // v + // OHOS_LOCAL end + // Iterate over list of callee-saved registers and emit .cfi_offset // directives. + // OHOS_LOCAL begin + int64_t FPOffset = StackSize - LoongArchFI->getVarArgsSaveSize(); + int64_t CFIFPOffset = LoongArchFI->getVarArgsSaveSize(); + // OHOS_LOCAL end for (const auto &Entry : CSI) { int64_t Offset = MFI.getObjectOffset(Entry.getFrameIdx()); unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( nullptr, RI->getDwarfRegNum(Entry.getReg(), true), Offset)); +// OHOS_LOCAL begin +#ifdef ARK_GC_SUPPORT + if (Entry.getReg() == FPReg) { + FPOffset = StackSize - -Offset; + CFIFPOffset = -Offset; + } +#endif + // OHOS_LOCAL end BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlag(MachineInstr::FrameSetup); @@ -244,14 +302,14 @@ void LoongArchFrameLowering::emitPrologue(MachineFunction &MF, // Generate new FP. if (hasFP(MF)) { - adjustReg(MBB, MBBI, DL, FPReg, SPReg, - StackSize - LoongArchFI->getVarArgsSaveSize(), - MachineInstr::FrameSetup); - - // Emit ".cfi_def_cfa $fp, LoongArchFI->getVarArgsSaveSize()" - unsigned CFIIndex = MF.addFrameInst( - MCCFIInstruction::cfiDefCfa(nullptr, RI->getDwarfRegNum(FPReg, true), - LoongArchFI->getVarArgsSaveSize())); + // OHOS_LOCAL begin + LoongArchFI->setFPOffsetAdjustment(CFIFPOffset); + adjustReg(MBB, MBBI, DL, FPReg, SPReg, FPOffset, MachineInstr::FrameSetup); + + // Emit ".cfi_def_cfa $fp, CFIFPOffset)" + unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa( + nullptr, RI->getDwarfRegNum(FPReg, true), CFIFPOffset)); + // OHOS_LOCAL end BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlag(MachineInstr::FrameSetup); @@ -327,8 +385,14 @@ void LoongArchFrameLowering::emitEpilogue(MachineFunction &MF, Register SPReg = LoongArch::R3; // All calls are tail calls in GHC calling conv, and functions have no // prologue/epilogue. +#ifndef ARK_GC_SUPPORT // OHOS_LOCAL if (MF.getFunction().getCallingConv() == CallingConv::GHC) return; +// OHOS_LOCAL begin +#endif + // asm-int GHC call webkit function, we need push regs to stack. + // OHOS_LOCAL end + MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); @@ -345,7 +409,7 @@ void LoongArchFrameLowering::emitEpilogue(MachineFunction &MF, if (RI->hasStackRealignment(MF) || MFI.hasVarSizedObjects()) { assert(hasFP(MF) && "frame pointer should not have been eliminated"); adjustReg(MBB, LastFrameDestroy, DL, SPReg, LoongArch::R22, - -StackSize + LoongArchFI->getVarArgsSaveSize(), + -StackSize + LoongArchFI->getFPOffsetAdjustment(), // OHOS_LOCAL MachineInstr::FrameDestroy); } @@ -511,7 +575,8 @@ StackOffset LoongArchFrameLowering::getFrameIndexReference( } else { FrameReg = RI->getFrameRegister(MF); if (hasFP(MF)) - Offset += StackOffset::getFixed(LoongArchFI->getVarArgsSaveSize()); + Offset += StackOffset::getFixed( + LoongArchFI->getFPOffsetAdjustment()); // OHOS_LOCAL else Offset += StackOffset::getFixed(StackSize); } diff --git a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.h b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.h index 414d671593d0b4e674b05c527934359845dd3e4e..5d48eeb75b0aeefa3a8a65df15673bb68ec53340 100644 --- a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.h +++ b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.h @@ -14,6 +14,11 @@ #define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHFRAMELOWERING_H #include "llvm/CodeGen/TargetFrameLowering.h" +// OHOS_LOCAL begin +#ifdef ARK_GC_SUPPORT +#include "llvm/ADT/Triple.h" +#endif +// OHOS_LOCAL end namespace llvm { class LoongArchSubtarget; @@ -30,6 +35,12 @@ public: void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; +// OHOS_LOCAL begin +#ifdef ARK_GC_SUPPORT + Triple::ArchType GetArkSupportTarget() const override; + int GetFixedFpPosition() const override; +#endif + // OHOS_LOCAL end void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS) const override; diff --git a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp index 49684b911cc63a1e8effd54c6d90cd0e78e1de29..056be239bae5ff30e3a334373012fa8b42e94ee7 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp @@ -15,6 +15,7 @@ #include "MCTargetDesc/LoongArchMCTargetDesc.h" #include "MCTargetDesc/LoongArchMatInt.h" #include "llvm/Support/KnownBits.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -70,7 +71,64 @@ void LoongArchDAGToDAGISel::Select(SDNode *Node) { ReplaceNode(Node, CurDAG->getMachineNode(ADDIOp, DL, VT, TFI, Imm)); return; } - // TODO: Add selection nodes needed later. + case ISD::BITCAST: { + if (VT.is128BitVector() || VT.is256BitVector()) { + ReplaceUses(SDValue(Node, 0), Node->getOperand(0)); + CurDAG->RemoveDeadNode(Node); + return; + } + break; + } + case ISD::BUILD_VECTOR: { + // Select appropriate [x]vrepli.[bhwd] instructions for constant splats of + // 128/256-bit when LSX/LASX is enabled. + BuildVectorSDNode *BVN = cast(Node); + APInt SplatValue, SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + unsigned Op; + EVT ViaVecTy; + bool Is128Vec = BVN->getValueType(0).is128BitVector(); + bool Is256Vec = BVN->getValueType(0).is256BitVector(); + + if (!Subtarget->hasExtLSX() || (!Is128Vec && !Is256Vec)) + break; + if (!BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, + HasAnyUndefs, 8)) + break; + + switch (SplatBitSize) { + default: + break; + case 8: + Op = Is256Vec ? LoongArch::PseudoXVREPLI_B : LoongArch::PseudoVREPLI_B; + ViaVecTy = Is256Vec ? MVT::v32i8 : MVT::v16i8; + break; + case 16: + Op = Is256Vec ? LoongArch::PseudoXVREPLI_H : LoongArch::PseudoVREPLI_H; + ViaVecTy = Is256Vec ? MVT::v16i16 : MVT::v8i16; + break; + case 32: + Op = Is256Vec ? LoongArch::PseudoXVREPLI_W : LoongArch::PseudoVREPLI_W; + ViaVecTy = Is256Vec ? MVT::v8i32 : MVT::v4i32; + break; + case 64: + Op = Is256Vec ? LoongArch::PseudoXVREPLI_D : LoongArch::PseudoVREPLI_D; + ViaVecTy = Is256Vec ? MVT::v4i64 : MVT::v2i64; + break; + } + + SDNode *Res; + // If we have a signed 10 bit integer, we can splat it directly. + if (SplatValue.isSignedIntN(10)) { + SDValue Imm = CurDAG->getTargetConstant(SplatValue, DL, + ViaVecTy.getVectorElementType()); + Res = CurDAG->getMachineNode(Op, DL, ViaVecTy, Imm); + ReplaceNode(Node, Res); + return; + } + break; + } } // Select the default instruction. @@ -238,6 +296,96 @@ bool LoongArchDAGToDAGISel::selectZExti32(SDValue N, SDValue &Val) { return false; } +bool LoongArchDAGToDAGISel::selectVSplat(SDNode *N, APInt &Imm, + unsigned MinSizeInBits) const { + if (!Subtarget->hasExtLSX()) + return false; + + BuildVectorSDNode *Node = dyn_cast(N); + + if (!Node) + return false; + + APInt SplatValue, SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + + if (!Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs, + MinSizeInBits, /*IsBigEndian=*/false)) + return false; + + Imm = SplatValue; + + return true; +} + +template +bool LoongArchDAGToDAGISel::selectVSplatImm(SDValue N, SDValue &SplatVal) { + APInt ImmValue; + EVT EltTy = N->getValueType(0).getVectorElementType(); + + if (N->getOpcode() == ISD::BITCAST) + N = N->getOperand(0); + + if (selectVSplat(N.getNode(), ImmValue, EltTy.getSizeInBits()) && + ImmValue.getBitWidth() == EltTy.getSizeInBits()) { + if (IsSigned && ImmValue.isSignedIntN(ImmBitSize)) { + SplatVal = CurDAG->getTargetConstant(ImmValue.getSExtValue(), SDLoc(N), + Subtarget->getGRLenVT()); + return true; + } + if (!IsSigned && ImmValue.isIntN(ImmBitSize)) { + SplatVal = CurDAG->getTargetConstant(ImmValue.getZExtValue(), SDLoc(N), + Subtarget->getGRLenVT()); + return true; + } + } + + return false; +} + +bool LoongArchDAGToDAGISel::selectVSplatUimmInvPow2(SDValue N, + SDValue &SplatImm) const { + APInt ImmValue; + EVT EltTy = N->getValueType(0).getVectorElementType(); + + if (N->getOpcode() == ISD::BITCAST) + N = N->getOperand(0); + + if (selectVSplat(N.getNode(), ImmValue, EltTy.getSizeInBits()) && + ImmValue.getBitWidth() == EltTy.getSizeInBits()) { + int32_t Log2 = (~ImmValue).exactLogBase2(); + + if (Log2 != -1) { + SplatImm = CurDAG->getTargetConstant(Log2, SDLoc(N), EltTy); + return true; + } + } + + return false; +} + +bool LoongArchDAGToDAGISel::selectVSplatUimmPow2(SDValue N, + SDValue &SplatImm) const { + APInt ImmValue; + EVT EltTy = N->getValueType(0).getVectorElementType(); + + if (N->getOpcode() == ISD::BITCAST) + N = N->getOperand(0); + + if (selectVSplat(N.getNode(), ImmValue, EltTy.getSizeInBits()) && + ImmValue.getBitWidth() == EltTy.getSizeInBits()) { + int32_t Log2 = ImmValue.exactLogBase2(); + + if (Log2 != -1) { + SplatImm = CurDAG->getTargetConstant(Log2, SDLoc(N), EltTy); + return true; + } + } + + return false; +} + // This pass converts a legalized DAG into a LoongArch-specific DAG, ready // for instruction scheduling. FunctionPass *llvm::createLoongArchISelDag(LoongArchTargetMachine &TM) { diff --git a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h index 49843ac610da2621be3d0c214144032b11775ec2..ff930dac8b179d55e3c93d6ad8d44b375933c279 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h +++ b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h @@ -55,6 +55,14 @@ public: bool selectSExti32(SDValue N, SDValue &Val); bool selectZExti32(SDValue N, SDValue &Val); + bool selectVSplat(SDNode *N, APInt &Imm, unsigned MinSizeInBits) const; + + template + bool selectVSplatImm(SDValue N, SDValue &SplatVal); + + bool selectVSplatUimmInvPow2(SDValue N, SDValue &SplatImm) const; + bool selectVSplatUimmPow2(SDValue N, SDValue &SplatImm) const; + // Include the pieces autogenerated from the target description. #include "LoongArchGenDAGISel.inc" }; diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index 78248e0a88ebe284aa75eaaac2cea576cd451e00..f628d5dcec9757f6d68bcb3966f263f352764751 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -22,9 +22,12 @@ #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/RuntimeLibcalls.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/IntrinsicsLoongArch.h" +#include "llvm/Support/CodeGen.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" @@ -44,45 +47,79 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, : TargetLowering(TM), Subtarget(STI) { MVT GRLenVT = Subtarget.getGRLenVT(); + // Set up the register classes. + addRegisterClass(GRLenVT, &LoongArch::GPRRegClass); if (Subtarget.hasBasicF()) addRegisterClass(MVT::f32, &LoongArch::FPR32RegClass); if (Subtarget.hasBasicD()) addRegisterClass(MVT::f64, &LoongArch::FPR64RegClass); + static const MVT::SimpleValueType LSXVTs[] = { + MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64}; + static const MVT::SimpleValueType LASXVTs[] = { + MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v8f32, MVT::v4f64}; + + if (Subtarget.hasExtLSX()) + for (MVT VT : LSXVTs) + addRegisterClass(VT, &LoongArch::LSX128RegClass); + + if (Subtarget.hasExtLASX()) + for (MVT VT : LASXVTs) + addRegisterClass(VT, &LoongArch::LASX256RegClass); + + // Set operations for LA32 and LA64. + setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, GRLenVT, MVT::i1, Promote); - // TODO: add necessary setOperationAction calls later. setOperationAction(ISD::SHL_PARTS, GRLenVT, Custom); setOperationAction(ISD::SRA_PARTS, GRLenVT, Custom); setOperationAction(ISD::SRL_PARTS, GRLenVT, Custom); setOperationAction(ISD::FP_TO_SINT, GRLenVT, Custom); setOperationAction(ISD::ROTL, GRLenVT, Expand); setOperationAction(ISD::CTPOP, GRLenVT, Expand); - setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal); - setOperationAction(ISD::TRAP, MVT::Other, Legal); - setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); - setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); setOperationAction({ISD::GlobalAddress, ISD::BlockAddress, ISD::ConstantPool, - ISD::JumpTable}, + ISD::JumpTable, ISD::GlobalTLSAddress}, GRLenVT, Custom); - setOperationAction(ISD::GlobalTLSAddress, GRLenVT, Custom); - - setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); - - setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom); - if (Subtarget.is64Bit()) - setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom); + setOperationAction(ISD::EH_DWARF_CFA, GRLenVT, Custom); setOperationAction(ISD::DYNAMIC_STACKALLOC, GRLenVT, Expand); setOperationAction({ISD::STACKSAVE, ISD::STACKRESTORE}, MVT::Other, Expand); setOperationAction(ISD::VASTART, MVT::Other, Custom); setOperationAction({ISD::VAARG, ISD::VACOPY, ISD::VAEND}, MVT::Other, Expand); + setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal); + setOperationAction(ISD::TRAP, MVT::Other, Legal); + + setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); + setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); + + // Expand bitreverse.i16 with native-width bitrev and shift for now, before + // we get to know which of sll and revb.2h is faster. + setOperationAction(ISD::BITREVERSE, MVT::i8, Custom); + setOperationAction(ISD::BITREVERSE, GRLenVT, Legal); + + // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and + // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16 + // and i32 could still be byte-swapped relatively cheaply. + setOperationAction(ISD::BSWAP, MVT::i16, Custom); + + setOperationAction(ISD::BR_JT, MVT::Other, Expand); + setOperationAction(ISD::BR_CC, GRLenVT, Expand); + setOperationAction(ISD::SELECT_CC, GRLenVT, Expand); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); + setOperationAction({ISD::SMUL_LOHI, ISD::UMUL_LOHI}, GRLenVT, Expand); + + setOperationAction(ISD::FP_TO_UINT, GRLenVT, Custom); + setOperationAction(ISD::UINT_TO_FP, GRLenVT, Expand); + + // Set operations for LA64 only. + if (Subtarget.is64Bit()) { setOperationAction(ISD::SHL, MVT::i32, Custom); setOperationAction(ISD::SRA, MVT::i32, Custom); @@ -93,48 +130,44 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, setOperationAction(ISD::ROTL, MVT::i32, Custom); setOperationAction(ISD::CTTZ, MVT::i32, Custom); setOperationAction(ISD::CTLZ, MVT::i32, Custom); - setOperationAction(ISD::INTRINSIC_VOID, MVT::i32, Custom); - setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i32, Custom); - setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); + setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom); setOperationAction(ISD::READ_REGISTER, MVT::i32, Custom); setOperationAction(ISD::WRITE_REGISTER, MVT::i32, Custom); - if (Subtarget.hasBasicF() && !Subtarget.hasBasicD()) - setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); - if (Subtarget.hasBasicF()) - setOperationAction(ISD::FRINT, MVT::f32, Legal); - if (Subtarget.hasBasicD()) - setOperationAction(ISD::FRINT, MVT::f64, Legal); - } + setOperationAction(ISD::INTRINSIC_VOID, MVT::i32, Custom); + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom); + setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i32, Custom); - // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and - // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16 - // and i32 could still be byte-swapped relatively cheaply. - setOperationAction(ISD::BSWAP, MVT::i16, Custom); - if (Subtarget.is64Bit()) { + setOperationAction(ISD::BITREVERSE, MVT::i32, Custom); setOperationAction(ISD::BSWAP, MVT::i32, Custom); } - // Expand bitreverse.i16 with native-width bitrev and shift for now, before - // we get to know which of sll and revb.2h is faster. - setOperationAction(ISD::BITREVERSE, MVT::i8, Custom); - if (Subtarget.is64Bit()) { - setOperationAction(ISD::BITREVERSE, MVT::i32, Custom); - setOperationAction(ISD::BITREVERSE, MVT::i64, Legal); - } else { - setOperationAction(ISD::BITREVERSE, MVT::i32, Legal); - setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom); + // Set operations for LA32 only. + + if (!Subtarget.is64Bit()) { setOperationAction(ISD::READ_REGISTER, MVT::i64, Custom); setOperationAction(ISD::WRITE_REGISTER, MVT::i64, Custom); - setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); setOperationAction(ISD::INTRINSIC_VOID, MVT::i64, Custom); + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); + setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom); + + // Set libcalls. + setLibcallName(RTLIB::MUL_I128, nullptr); + // The MULO libcall is not part of libgcc, only compiler-rt. + setLibcallName(RTLIB::MULO_I64, nullptr); } + // The MULO libcall is not part of libgcc, only compiler-rt. + setLibcallName(RTLIB::MULO_I128, nullptr); + static const ISD::CondCode FPCCToExpand[] = { ISD::SETOGT, ISD::SETOGE, ISD::SETUGT, ISD::SETUGE, ISD::SETGE, ISD::SETNE, ISD::SETGT}; + // Set operations for 'F' feature. + if (Subtarget.hasBasicF()) { setCondCodeAction(FPCCToExpand, MVT::f32, Expand); + setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); setOperationAction(ISD::BR_CC, MVT::f32, Expand); setOperationAction(ISD::FMA, MVT::f32, Legal); @@ -147,14 +180,30 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FSINCOS, MVT::f32, Expand); setOperationAction(ISD::FPOW, MVT::f32, Expand); setOperationAction(ISD::FREM, MVT::f32, Expand); + + if (Subtarget.is64Bit()) + setOperationAction(ISD::FRINT, MVT::f32, Legal); + + if (!Subtarget.hasBasicD()) { + setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); + if (Subtarget.is64Bit()) { + setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); + setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); + } + } } + + // Set operations for 'D' feature. + if (Subtarget.hasBasicD()) { + setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); + setTruncStoreAction(MVT::f64, MVT::f32, Expand); setCondCodeAction(FPCCToExpand, MVT::f64, Expand); + setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); setOperationAction(ISD::BR_CC, MVT::f64, Expand); setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Legal); setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Legal); - setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); setOperationAction(ISD::FMA, MVT::f64, Legal); setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal); setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal); @@ -163,99 +212,1247 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FSINCOS, MVT::f64, Expand); setOperationAction(ISD::FPOW, MVT::f64, Expand); setOperationAction(ISD::FREM, MVT::f64, Expand); - setTruncStoreAction(MVT::f64, MVT::f32, Expand); + + if (Subtarget.is64Bit()) + setOperationAction(ISD::FRINT, MVT::f64, Legal); + } + + // Set operations for 'LSX' feature. + + if (Subtarget.hasExtLSX()) { + for (MVT VT : MVT::fixedlen_vector_valuetypes()) { + // Expand all truncating stores and extending loads. + for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) { + setTruncStoreAction(VT, InnerVT, Expand); + setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand); + setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand); + setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand); + } + // By default everything must be expanded. Then we will selectively turn + // on ones that can be effectively codegen'd. + for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) + setOperationAction(Op, VT, Expand); + } + + for (MVT VT : LSXVTs) { + setOperationAction({ISD::LOAD, ISD::STORE}, VT, Legal); + setOperationAction(ISD::BITCAST, VT, Legal); + setOperationAction(ISD::UNDEF, VT, Legal); + + setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal); + setOperationAction(ISD::BUILD_VECTOR, VT, Custom); + + setOperationAction(ISD::SETCC, VT, Legal); + setOperationAction(ISD::VSELECT, VT, Legal); + setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); + } + for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) { + setOperationAction({ISD::ADD, ISD::SUB}, VT, Legal); + setOperationAction({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, VT, + Legal); + setOperationAction({ISD::MUL, ISD::SDIV, ISD::SREM, ISD::UDIV, ISD::UREM}, + VT, Legal); + setOperationAction({ISD::AND, ISD::OR, ISD::XOR}, VT, Legal); + setOperationAction({ISD::SHL, ISD::SRA, ISD::SRL}, VT, Legal); + setOperationAction({ISD::CTPOP, ISD::CTLZ}, VT, Legal); + setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Legal); + setCondCodeAction( + {ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT, + Expand); + } + for (MVT VT : {MVT::v4i32, MVT::v2i64}) { + setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP}, VT, Legal); + setOperationAction({ISD::FP_TO_SINT, ISD::FP_TO_UINT}, VT, Legal); + } + for (MVT VT : {MVT::v4f32, MVT::v2f64}) { + setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal); + setOperationAction({ISD::FMUL, ISD::FDIV}, VT, Legal); + setOperationAction(ISD::FMA, VT, Legal); + setOperationAction(ISD::FSQRT, VT, Legal); + setOperationAction(ISD::FNEG, VT, Legal); + setCondCodeAction({ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT, + ISD::SETUGE, ISD::SETUGT}, + VT, Expand); + } + } + + // Set operations for 'LASX' feature. + + if (Subtarget.hasExtLASX()) { + for (MVT VT : LASXVTs) { + setOperationAction({ISD::LOAD, ISD::STORE}, VT, Legal); + setOperationAction(ISD::BITCAST, VT, Legal); + setOperationAction(ISD::UNDEF, VT, Legal); + + setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); + setOperationAction(ISD::BUILD_VECTOR, VT, Custom); + + setOperationAction(ISD::SETCC, VT, Legal); + setOperationAction(ISD::VSELECT, VT, Legal); + setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); + } + for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) { + setOperationAction({ISD::ADD, ISD::SUB}, VT, Legal); + setOperationAction({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, VT, + Legal); + setOperationAction({ISD::MUL, ISD::SDIV, ISD::SREM, ISD::UDIV, ISD::UREM}, + VT, Legal); + setOperationAction({ISD::AND, ISD::OR, ISD::XOR}, VT, Legal); + setOperationAction({ISD::SHL, ISD::SRA, ISD::SRL}, VT, Legal); + setOperationAction({ISD::CTPOP, ISD::CTLZ}, VT, Legal); + setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Legal); + setCondCodeAction( + {ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT, + Expand); + } + for (MVT VT : {MVT::v8i32, MVT::v4i32, MVT::v4i64}) { + setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP}, VT, Legal); + setOperationAction({ISD::FP_TO_SINT, ISD::FP_TO_UINT}, VT, Legal); + } + for (MVT VT : {MVT::v8f32, MVT::v4f64}) { + setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal); + setOperationAction({ISD::FMUL, ISD::FDIV}, VT, Legal); + setOperationAction(ISD::FMA, VT, Legal); + setOperationAction(ISD::FSQRT, VT, Legal); + setOperationAction(ISD::FNEG, VT, Legal); + setCondCodeAction({ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT, + ISD::SETUGE, ISD::SETUGT}, + VT, Expand); + } + } + + // Set DAG combine for LA32 and LA64. + + setTargetDAGCombine(ISD::AND); + setTargetDAGCombine(ISD::OR); + setTargetDAGCombine(ISD::SRL); + + // Set DAG combine for 'LSX' feature. + + if (Subtarget.hasExtLSX()) + setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); + + // Compute derived properties from the register classes. + computeRegisterProperties(STI.getRegisterInfo()); + + setStackPointerRegisterToSaveRestore(LoongArch::R3); + + setBooleanContents(ZeroOrOneBooleanContent); + setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); + + setMaxAtomicSizeInBitsSupported(Subtarget.getGRLen()); + + setMinCmpXchgSizeInBits(32); + + // Function alignments. + const Align FunctionAlignment(4); + setMinFunctionAlignment(FunctionAlignment); +} + +bool LoongArchTargetLowering::isOffsetFoldingLegal( + const GlobalAddressSDNode *GA) const { + // In order to maximise the opportunity for common subexpression elimination, + // keep a separate ADD node for the global address offset instead of folding + // it in the global address node. Later peephole optimisations may choose to + // fold it back in when profitable. + return false; +} + +SDValue LoongArchTargetLowering::LowerOperation(SDValue Op, + SelectionDAG &DAG) const { + switch (Op.getOpcode()) { + case ISD::EH_DWARF_CFA: + return lowerEH_DWARF_CFA(Op, DAG); + case ISD::GlobalAddress: + return lowerGlobalAddress(Op, DAG); + case ISD::GlobalTLSAddress: + return lowerGlobalTLSAddress(Op, DAG); + case ISD::INTRINSIC_WO_CHAIN: + return lowerINTRINSIC_WO_CHAIN(Op, DAG); + case ISD::INTRINSIC_W_CHAIN: + return lowerINTRINSIC_W_CHAIN(Op, DAG); + case ISD::INTRINSIC_VOID: + return lowerINTRINSIC_VOID(Op, DAG); + case ISD::BlockAddress: + return lowerBlockAddress(Op, DAG); + case ISD::JumpTable: + return lowerJumpTable(Op, DAG); + case ISD::SHL_PARTS: + return lowerShiftLeftParts(Op, DAG); + case ISD::SRA_PARTS: + return lowerShiftRightParts(Op, DAG, true); + case ISD::SRL_PARTS: + return lowerShiftRightParts(Op, DAG, false); + case ISD::ConstantPool: + return lowerConstantPool(Op, DAG); + case ISD::FP_TO_SINT: + return lowerFP_TO_SINT(Op, DAG); + case ISD::BITCAST: + return lowerBITCAST(Op, DAG); + case ISD::UINT_TO_FP: + return lowerUINT_TO_FP(Op, DAG); + case ISD::SINT_TO_FP: + return lowerSINT_TO_FP(Op, DAG); + case ISD::VASTART: + return lowerVASTART(Op, DAG); + case ISD::FRAMEADDR: + return lowerFRAMEADDR(Op, DAG); + case ISD::RETURNADDR: + return lowerRETURNADDR(Op, DAG); + case ISD::WRITE_REGISTER: + return lowerWRITE_REGISTER(Op, DAG); + case ISD::INSERT_VECTOR_ELT: + return lowerINSERT_VECTOR_ELT(Op, DAG); + case ISD::EXTRACT_VECTOR_ELT: + return lowerEXTRACT_VECTOR_ELT(Op, DAG); + case ISD::BUILD_VECTOR: + return lowerBUILD_VECTOR(Op, DAG); + case ISD::VECTOR_SHUFFLE: + return lowerVECTOR_SHUFFLE(Op, DAG); + } + return SDValue(); +} + +/// Determine whether a range fits a regular pattern of values. +/// This function accounts for the possibility of jumping over the End iterator. +template +static bool +fitsRegularPattern(typename SmallVectorImpl::const_iterator Begin, + unsigned CheckStride, + typename SmallVectorImpl::const_iterator End, + ValType ExpectedIndex, unsigned ExpectedIndexStride) { + auto &I = Begin; + + while (I != End) { + if (*I != -1 && *I != ExpectedIndex) + return false; + ExpectedIndex += ExpectedIndexStride; + + // Incrementing past End is undefined behaviour so we must increment one + // step at a time and check for End at each step. + for (unsigned n = 0; n < CheckStride && I != End; ++n, ++I) + ; // Empty loop body. + } + return true; +} + +/// Lower VECTOR_SHUFFLE into VREPLVEI (if possible). +/// +/// VREPLVEI performs vector broadcast based on an element specified by an +/// integer immediate, with its mask being similar to: +/// +/// where x is any valid index. +/// +/// When undef's appear in the mask they are treated as if they were whatever +/// value is necessary in order to fit the above form. +static SDValue lowerVECTOR_SHUFFLE_VREPLVEI(const SDLoc &DL, ArrayRef Mask, + MVT VT, SDValue V1, SDValue V2, + SelectionDAG &DAG) { + int SplatIndex = -1; + for (const auto &M : Mask) { + if (M != -1) { + SplatIndex = M; + break; + } + } + + if (SplatIndex == -1) + return DAG.getUNDEF(VT); + + assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index"); + if (fitsRegularPattern(Mask.begin(), 1, Mask.end(), SplatIndex, 0)) { + APInt Imm(64, SplatIndex); + return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1, + DAG.getConstant(Imm, DL, MVT::i64)); + } + + return SDValue(); +} + +/// Lower VECTOR_SHUFFLE into VSHUF4I (if possible). +/// +/// VSHUF4I splits the vector into blocks of four elements, then shuffles these +/// elements according to a <4 x i2> constant (encoded as an integer immediate). +/// +/// It is therefore possible to lower into VSHUF4I when the mask takes the form: +/// +/// When undef's appear they are treated as if they were whatever value is +/// necessary in order to fit the above forms. +/// +/// For example: +/// %2 = shufflevector <8 x i16> %0, <8 x i16> undef, +/// <8 x i32> +/// is lowered to: +/// (VSHUF4I_H $v0, $v1, 27) +/// where the 27 comes from: +/// 3 + (2 << 2) + (1 << 4) + (0 << 6) +static SDValue lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef Mask, + MVT VT, SDValue V1, SDValue V2, + SelectionDAG &DAG) { + + // When the size is less than 4, lower cost instructions may be used. + if (Mask.size() < 4) + return SDValue(); + + int SubMask[4] = {-1, -1, -1, -1}; + for (unsigned i = 0; i < 4; ++i) { + for (unsigned j = i; j < Mask.size(); j += 4) { + int Idx = Mask[j]; + + // Convert from vector index to 4-element subvector index + // If an index refers to an element outside of the subvector then give up + if (Idx != -1) { + Idx -= 4 * (j / 4); + if (Idx < 0 || Idx >= 4) + return SDValue(); + } + + // If the mask has an undef, replace it with the current index. + // Note that it might still be undef if the current index is also undef + if (SubMask[i] == -1) + SubMask[i] = Idx; + // Check that non-undef values are the same as in the mask. If they + // aren't then give up + else if (Idx != -1 && Idx != SubMask[i]) + return SDValue(); + } + } + + // Calculate the immediate. Replace any remaining undefs with zero + APInt Imm(64, 0); + for (int i = 3; i >= 0; --i) { + int Idx = SubMask[i]; + + if (Idx == -1) + Idx = 0; + + Imm <<= 2; + Imm |= Idx & 0x3; + } + + return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1, + DAG.getConstant(Imm, DL, MVT::i64)); +} + +/// Lower VECTOR_SHUFFLE into VPACKEV (if possible). +/// +/// VPACKEV interleaves the even elements from each vector. +/// +/// It is possible to lower into VPACKEV when the mask consists of two of the +/// following forms interleaved: +/// <0, 2, 4, ...> +/// +/// where n is the number of elements in the vector. +/// For example: +/// <0, 0, 2, 2, 4, 4, ...> +/// <0, n, 2, n+2, 4, n+4, ...> +/// +/// When undef's appear in the mask they are treated as if they were whatever +/// value is necessary in order to fit the above forms. +static SDValue lowerVECTOR_SHUFFLE_VPACKEV(const SDLoc &DL, ArrayRef Mask, + MVT VT, SDValue V1, SDValue V2, + SelectionDAG &DAG) { + + const auto &Begin = Mask.begin(); + const auto &End = Mask.end(); + SDValue OriV1 = V1, OriV2 = V2; + + if (fitsRegularPattern(Begin, 2, End, 0, 2)) + V1 = OriV1; + else if (fitsRegularPattern(Begin, 2, End, Mask.size(), 2)) + V1 = OriV2; + else + return SDValue(); + + if (fitsRegularPattern(Begin + 1, 2, End, 0, 2)) + V2 = OriV1; + else if (fitsRegularPattern(Begin + 1, 2, End, Mask.size(), 2)) + V2 = OriV2; + else + return SDValue(); + + return DAG.getNode(LoongArchISD::VPACKEV, DL, VT, V2, V1); +} + +/// Lower VECTOR_SHUFFLE into VPACKOD (if possible). +/// +/// VPACKOD interleaves the odd elements from each vector. +/// +/// It is possible to lower into VPACKOD when the mask consists of two of the +/// following forms interleaved: +/// <1, 3, 5, ...> +/// +/// where n is the number of elements in the vector. +/// For example: +/// <1, 1, 3, 3, 5, 5, ...> +/// <1, n+1, 3, n+3, 5, n+5, ...> +/// +/// When undef's appear in the mask they are treated as if they were whatever +/// value is necessary in order to fit the above forms. +static SDValue lowerVECTOR_SHUFFLE_VPACKOD(const SDLoc &DL, ArrayRef Mask, + MVT VT, SDValue V1, SDValue V2, + SelectionDAG &DAG) { + + const auto &Begin = Mask.begin(); + const auto &End = Mask.end(); + SDValue OriV1 = V1, OriV2 = V2; + + if (fitsRegularPattern(Begin, 2, End, 1, 2)) + V1 = OriV1; + else if (fitsRegularPattern(Begin, 2, End, Mask.size() + 1, 2)) + V1 = OriV2; + else + return SDValue(); + + if (fitsRegularPattern(Begin + 1, 2, End, 1, 2)) + V2 = OriV1; + else if (fitsRegularPattern(Begin + 1, 2, End, Mask.size() + 1, 2)) + V2 = OriV2; + else + return SDValue(); + + return DAG.getNode(LoongArchISD::VPACKOD, DL, VT, V2, V1); +} + +/// Lower VECTOR_SHUFFLE into VILVH (if possible). +/// +/// VILVH interleaves consecutive elements from the left (highest-indexed) half +/// of each vector. +/// +/// It is possible to lower into VILVH when the mask consists of two of the +/// following forms interleaved: +/// +/// +/// where n is the number of elements in the vector and x is half n. +/// For example: +/// +/// +/// +/// When undef's appear in the mask they are treated as if they were whatever +/// value is necessary in order to fit the above forms. +static SDValue lowerVECTOR_SHUFFLE_VILVH(const SDLoc &DL, ArrayRef Mask, + MVT VT, SDValue V1, SDValue V2, + SelectionDAG &DAG) { + + const auto &Begin = Mask.begin(); + const auto &End = Mask.end(); + unsigned HalfSize = Mask.size() / 2; + SDValue OriV1 = V1, OriV2 = V2; + + if (fitsRegularPattern(Begin, 2, End, HalfSize, 1)) + V1 = OriV1; + else if (fitsRegularPattern(Begin, 2, End, Mask.size() + HalfSize, 1)) + V1 = OriV2; + else + return SDValue(); + + if (fitsRegularPattern(Begin + 1, 2, End, HalfSize, 1)) + V2 = OriV1; + else if (fitsRegularPattern(Begin + 1, 2, End, Mask.size() + HalfSize, + 1)) + V2 = OriV2; + else + return SDValue(); + + return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1); +} + +/// Lower VECTOR_SHUFFLE into VILVL (if possible). +/// +/// VILVL interleaves consecutive elements from the right (lowest-indexed) half +/// of each vector. +/// +/// It is possible to lower into VILVL when the mask consists of two of the +/// following forms interleaved: +/// <0, 1, 2, ...> +/// +/// where n is the number of elements in the vector. +/// For example: +/// <0, 0, 1, 1, 2, 2, ...> +/// <0, n, 1, n+1, 2, n+2, ...> +/// +/// When undef's appear in the mask they are treated as if they were whatever +/// value is necessary in order to fit the above forms. +static SDValue lowerVECTOR_SHUFFLE_VILVL(const SDLoc &DL, ArrayRef Mask, + MVT VT, SDValue V1, SDValue V2, + SelectionDAG &DAG) { + + const auto &Begin = Mask.begin(); + const auto &End = Mask.end(); + SDValue OriV1 = V1, OriV2 = V2; + + if (fitsRegularPattern(Begin, 2, End, 0, 1)) + V1 = OriV1; + else if (fitsRegularPattern(Begin, 2, End, Mask.size(), 1)) + V1 = OriV2; + else + return SDValue(); + + if (fitsRegularPattern(Begin + 1, 2, End, 0, 1)) + V2 = OriV1; + else if (fitsRegularPattern(Begin + 1, 2, End, Mask.size(), 1)) + V2 = OriV2; + else + return SDValue(); + + return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1); +} + +/// Lower VECTOR_SHUFFLE into VPICKEV (if possible). +/// +/// VPICKEV copies the even elements of each vector into the result vector. +/// +/// It is possible to lower into VPICKEV when the mask consists of two of the +/// following forms concatenated: +/// <0, 2, 4, ...> +/// +/// where n is the number of elements in the vector. +/// For example: +/// <0, 2, 4, ..., 0, 2, 4, ...> +/// <0, 2, 4, ..., n, n+2, n+4, ...> +/// +/// When undef's appear in the mask they are treated as if they were whatever +/// value is necessary in order to fit the above forms. +static SDValue lowerVECTOR_SHUFFLE_VPICKEV(const SDLoc &DL, ArrayRef Mask, + MVT VT, SDValue V1, SDValue V2, + SelectionDAG &DAG) { + + const auto &Begin = Mask.begin(); + const auto &Mid = Mask.begin() + Mask.size() / 2; + const auto &End = Mask.end(); + SDValue OriV1 = V1, OriV2 = V2; + + if (fitsRegularPattern(Begin, 1, Mid, 0, 2)) + V1 = OriV1; + else if (fitsRegularPattern(Begin, 1, Mid, Mask.size(), 2)) + V1 = OriV2; + else + return SDValue(); + + if (fitsRegularPattern(Mid, 1, End, 0, 2)) + V2 = OriV1; + else if (fitsRegularPattern(Mid, 1, End, Mask.size(), 2)) + V2 = OriV2; + + else + return SDValue(); + + return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1); +} + +/// Lower VECTOR_SHUFFLE into VPICKOD (if possible). +/// +/// VPICKOD copies the odd elements of each vector into the result vector. +/// +/// It is possible to lower into VPICKOD when the mask consists of two of the +/// following forms concatenated: +/// <1, 3, 5, ...> +/// +/// where n is the number of elements in the vector. +/// For example: +/// <1, 3, 5, ..., 1, 3, 5, ...> +/// <1, 3, 5, ..., n+1, n+3, n+5, ...> +/// +/// When undef's appear in the mask they are treated as if they were whatever +/// value is necessary in order to fit the above forms. +static SDValue lowerVECTOR_SHUFFLE_VPICKOD(const SDLoc &DL, ArrayRef Mask, + MVT VT, SDValue V1, SDValue V2, + SelectionDAG &DAG) { + + const auto &Begin = Mask.begin(); + const auto &Mid = Mask.begin() + Mask.size() / 2; + const auto &End = Mask.end(); + SDValue OriV1 = V1, OriV2 = V2; + + if (fitsRegularPattern(Begin, 1, Mid, 1, 2)) + V1 = OriV1; + else if (fitsRegularPattern(Begin, 1, Mid, Mask.size() + 1, 2)) + V1 = OriV2; + else + return SDValue(); + + if (fitsRegularPattern(Mid, 1, End, 1, 2)) + V2 = OriV1; + else if (fitsRegularPattern(Mid, 1, End, Mask.size() + 1, 2)) + V2 = OriV2; + else + return SDValue(); + + return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1); +} + +/// Lower VECTOR_SHUFFLE into VSHUF. +/// +/// This mostly consists of converting the shuffle mask into a BUILD_VECTOR and +/// adding it as an operand to the resulting VSHUF. +static SDValue lowerVECTOR_SHUFFLE_VSHUF(const SDLoc &DL, ArrayRef Mask, + MVT VT, SDValue V1, SDValue V2, + SelectionDAG &DAG) { + + SmallVector Ops; + for (auto M : Mask) + Ops.push_back(DAG.getConstant(M, DL, MVT::i64)); + + EVT MaskVecTy = VT.changeVectorElementTypeToInteger(); + SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, Ops); + + // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion. + // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11> + // VSHF concatenates the vectors in a bitwise fashion: + // <0b00, 0b01> + <0b10, 0b11> -> + // 0b0100 + 0b1110 -> 0b01001110 + // <0b10, 0b11, 0b00, 0b01> + // We must therefore swap the operands to get the correct result. + return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1); +} + +/// Dispatching routine to lower various 128-bit LoongArch vector shuffles. +/// +/// This routine breaks down the specific type of 128-bit shuffle and +/// dispatches to the lowering routines accordingly. +static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef Mask, MVT VT, + SDValue V1, SDValue V2, SelectionDAG &DAG) { + assert((VT.SimpleTy == MVT::v16i8 || VT.SimpleTy == MVT::v8i16 || + VT.SimpleTy == MVT::v4i32 || VT.SimpleTy == MVT::v2i64 || + VT.SimpleTy == MVT::v4f32 || VT.SimpleTy == MVT::v2f64) && + "Vector type is unsupported for lsx!"); + assert(V1.getSimpleValueType() == V2.getSimpleValueType() && + "Two operands have different types!"); + assert(VT.getVectorNumElements() == Mask.size() && + "Unexpected mask size for shuffle!"); + assert(Mask.size() % 2 == 0 && "Expected even mask size."); + + SDValue Result; + // TODO: Add more comparison patterns. + if (V2.isUndef()) { + if ((Result = lowerVECTOR_SHUFFLE_VREPLVEI(DL, Mask, VT, V1, V2, DAG))) + return Result; + if ((Result = lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG))) + return Result; + + // TODO: This comment may be enabled in the future to better match the + // pattern for instruction selection. + /* V2 = V1; */ + } + + // It is recommended not to change the pattern comparison order for better + // performance. + if ((Result = lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG))) + return Result; + if ((Result = lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG))) + return Result; + if ((Result = lowerVECTOR_SHUFFLE_VILVH(DL, Mask, VT, V1, V2, DAG))) + return Result; + if ((Result = lowerVECTOR_SHUFFLE_VILVL(DL, Mask, VT, V1, V2, DAG))) + return Result; + if ((Result = lowerVECTOR_SHUFFLE_VPICKEV(DL, Mask, VT, V1, V2, DAG))) + return Result; + if ((Result = lowerVECTOR_SHUFFLE_VPICKOD(DL, Mask, VT, V1, V2, DAG))) + return Result; + if ((Result = lowerVECTOR_SHUFFLE_VSHUF(DL, Mask, VT, V1, V2, DAG))) + return Result; + + return SDValue(); +} + +/// Lower VECTOR_SHUFFLE into XVREPLVEI (if possible). +/// +/// It is a XVREPLVEI when the mask is: +/// +/// where the number of x is equal to n and n is half the length of vector. +/// +/// When undef's appear in the mask they are treated as if they were whatever +/// value is necessary in order to fit the above form. +static SDValue lowerVECTOR_SHUFFLE_XVREPLVEI(const SDLoc &DL, + ArrayRef Mask, MVT VT, + SDValue V1, SDValue V2, + SelectionDAG &DAG) { + int SplatIndex = -1; + for (const auto &M : Mask) { + if (M != -1) { + SplatIndex = M; + break; + } } - setOperationAction(ISD::BR_JT, MVT::Other, Expand); + if (SplatIndex == -1) + return DAG.getUNDEF(VT); + + const auto &Begin = Mask.begin(); + const auto &End = Mask.end(); + unsigned HalfSize = Mask.size() / 2; + + assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index"); + if (fitsRegularPattern(Begin, 1, End - HalfSize, SplatIndex, 0) && + fitsRegularPattern(Begin + HalfSize, 1, End, SplatIndex + HalfSize, + 0)) { + APInt Imm(64, SplatIndex); + return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1, + DAG.getConstant(Imm, DL, MVT::i64)); + } + + return SDValue(); +} + +/// Lower VECTOR_SHUFFLE into XVSHUF4I (if possible). +static SDValue lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef Mask, + MVT VT, SDValue V1, SDValue V2, + SelectionDAG &DAG) { + // When the size is less than or equal to 4, lower cost instructions may be + // used. + if (Mask.size() <= 4) + return SDValue(); + return lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG); +} + +/// Lower VECTOR_SHUFFLE into XVPACKEV (if possible). +static SDValue lowerVECTOR_SHUFFLE_XVPACKEV(const SDLoc &DL, ArrayRef Mask, + MVT VT, SDValue V1, SDValue V2, + SelectionDAG &DAG) { + return lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG); +} + +/// Lower VECTOR_SHUFFLE into XVPACKOD (if possible). +static SDValue lowerVECTOR_SHUFFLE_XVPACKOD(const SDLoc &DL, ArrayRef Mask, + MVT VT, SDValue V1, SDValue V2, + SelectionDAG &DAG) { + return lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG); +} + +/// Lower VECTOR_SHUFFLE into XVILVH (if possible). +static SDValue lowerVECTOR_SHUFFLE_XVILVH(const SDLoc &DL, ArrayRef Mask, + MVT VT, SDValue V1, SDValue V2, + SelectionDAG &DAG) { + + const auto &Begin = Mask.begin(); + const auto &End = Mask.end(); + unsigned HalfSize = Mask.size() / 2; + unsigned LeftSize = HalfSize / 2; + SDValue OriV1 = V1, OriV2 = V2; + + if (fitsRegularPattern(Begin, 2, End - HalfSize, HalfSize - LeftSize, + 1) && + fitsRegularPattern(Begin + HalfSize, 2, End, HalfSize + LeftSize, 1)) + V1 = OriV1; + else if (fitsRegularPattern(Begin, 2, End - HalfSize, + Mask.size() + HalfSize - LeftSize, 1) && + fitsRegularPattern(Begin + HalfSize, 2, End, + Mask.size() + HalfSize + LeftSize, 1)) + V1 = OriV2; + else + return SDValue(); + + if (fitsRegularPattern(Begin + 1, 2, End - HalfSize, HalfSize - LeftSize, + 1) && + fitsRegularPattern(Begin + 1 + HalfSize, 2, End, HalfSize + LeftSize, + 1)) + V2 = OriV1; + else if (fitsRegularPattern(Begin + 1, 2, End - HalfSize, + Mask.size() + HalfSize - LeftSize, 1) && + fitsRegularPattern(Begin + 1 + HalfSize, 2, End, + Mask.size() + HalfSize + LeftSize, 1)) + V2 = OriV2; + else + return SDValue(); + + return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1); +} + +/// Lower VECTOR_SHUFFLE into XVILVL (if possible). +static SDValue lowerVECTOR_SHUFFLE_XVILVL(const SDLoc &DL, ArrayRef Mask, + MVT VT, SDValue V1, SDValue V2, + SelectionDAG &DAG) { + + const auto &Begin = Mask.begin(); + const auto &End = Mask.end(); + unsigned HalfSize = Mask.size() / 2; + SDValue OriV1 = V1, OriV2 = V2; + + if (fitsRegularPattern(Begin, 2, End - HalfSize, 0, 1) && + fitsRegularPattern(Begin + HalfSize, 2, End, HalfSize, 1)) + V1 = OriV1; + else if (fitsRegularPattern(Begin, 2, End - HalfSize, Mask.size(), 1) && + fitsRegularPattern(Begin + HalfSize, 2, End, + Mask.size() + HalfSize, 1)) + V1 = OriV2; + else + return SDValue(); + + if (fitsRegularPattern(Begin + 1, 2, End - HalfSize, 0, 1) && + fitsRegularPattern(Begin + 1 + HalfSize, 2, End, HalfSize, 1)) + V2 = OriV1; + else if (fitsRegularPattern(Begin + 1, 2, End - HalfSize, Mask.size(), + 1) && + fitsRegularPattern(Begin + 1 + HalfSize, 2, End, + Mask.size() + HalfSize, 1)) + V2 = OriV2; + else + return SDValue(); + + return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1); +} + +/// Lower VECTOR_SHUFFLE into XVPICKEV (if possible). +static SDValue lowerVECTOR_SHUFFLE_XVPICKEV(const SDLoc &DL, ArrayRef Mask, + MVT VT, SDValue V1, SDValue V2, + SelectionDAG &DAG) { + + const auto &Begin = Mask.begin(); + const auto &LeftMid = Mask.begin() + Mask.size() / 4; + const auto &Mid = Mask.begin() + Mask.size() / 2; + const auto &RightMid = Mask.end() - Mask.size() / 4; + const auto &End = Mask.end(); + unsigned HalfSize = Mask.size() / 2; + SDValue OriV1 = V1, OriV2 = V2; + + if (fitsRegularPattern(Begin, 1, LeftMid, 0, 2) && + fitsRegularPattern(Mid, 1, RightMid, HalfSize, 2)) + V1 = OriV1; + else if (fitsRegularPattern(Begin, 1, LeftMid, Mask.size(), 2) && + fitsRegularPattern(Mid, 1, RightMid, Mask.size() + HalfSize, 2)) + V1 = OriV2; + else + return SDValue(); + + if (fitsRegularPattern(LeftMid, 1, Mid, 0, 2) && + fitsRegularPattern(RightMid, 1, End, HalfSize, 2)) + V2 = OriV1; + else if (fitsRegularPattern(LeftMid, 1, Mid, Mask.size(), 2) && + fitsRegularPattern(RightMid, 1, End, Mask.size() + HalfSize, 2)) + V2 = OriV2; + + else + return SDValue(); + + return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1); +} + +/// Lower VECTOR_SHUFFLE into XVPICKOD (if possible). +static SDValue lowerVECTOR_SHUFFLE_XVPICKOD(const SDLoc &DL, ArrayRef Mask, + MVT VT, SDValue V1, SDValue V2, + SelectionDAG &DAG) { + + const auto &Begin = Mask.begin(); + const auto &LeftMid = Mask.begin() + Mask.size() / 4; + const auto &Mid = Mask.begin() + Mask.size() / 2; + const auto &RightMid = Mask.end() - Mask.size() / 4; + const auto &End = Mask.end(); + unsigned HalfSize = Mask.size() / 2; + SDValue OriV1 = V1, OriV2 = V2; + + if (fitsRegularPattern(Begin, 1, LeftMid, 1, 2) && + fitsRegularPattern(Mid, 1, RightMid, HalfSize + 1, 2)) + V1 = OriV1; + else if (fitsRegularPattern(Begin, 1, LeftMid, Mask.size() + 1, 2) && + fitsRegularPattern(Mid, 1, RightMid, Mask.size() + HalfSize + 1, + 2)) + V1 = OriV2; + else + return SDValue(); + + if (fitsRegularPattern(LeftMid, 1, Mid, 1, 2) && + fitsRegularPattern(RightMid, 1, End, HalfSize + 1, 2)) + V2 = OriV1; + else if (fitsRegularPattern(LeftMid, 1, Mid, Mask.size() + 1, 2) && + fitsRegularPattern(RightMid, 1, End, Mask.size() + HalfSize + 1, + 2)) + V2 = OriV2; + else + return SDValue(); + + return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1); +} + +/// Lower VECTOR_SHUFFLE into XVSHUF (if possible). +static SDValue lowerVECTOR_SHUFFLE_XVSHUF(const SDLoc &DL, ArrayRef Mask, + MVT VT, SDValue V1, SDValue V2, + SelectionDAG &DAG) { + + int MaskSize = Mask.size(); + int HalfSize = Mask.size() / 2; + const auto &Begin = Mask.begin(); + const auto &Mid = Mask.begin() + HalfSize; + const auto &End = Mask.end(); + + // VECTOR_SHUFFLE concatenates the vectors: + // <0, 1, 2, 3, 4, 5, 6, 7> + <8, 9, 10, 11, 12, 13, 14, 15> + // shuffling -> + // <0, 1, 2, 3, 8, 9, 10, 11> <4, 5, 6, 7, 12, 13, 14, 15> + // + // XVSHUF concatenates the vectors: + // + + // shuffling -> + // + + SmallVector MaskAlloc; + for (auto it = Begin; it < Mid; it++) { + if (*it < 0) // UNDEF + MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64)); + else if ((*it >= 0 && *it < HalfSize) || + (*it >= MaskSize && *it <= MaskSize + HalfSize)) { + int M = *it < HalfSize ? *it : *it - HalfSize; + MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64)); + } else + return SDValue(); + } + assert((int)MaskAlloc.size() == HalfSize && "xvshuf convert failed!"); + + for (auto it = Mid; it < End; it++) { + if (*it < 0) // UNDEF + MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64)); + else if ((*it >= HalfSize && *it < MaskSize) || + (*it >= MaskSize + HalfSize && *it < MaskSize * 2)) { + int M = *it < MaskSize ? *it - HalfSize : *it - MaskSize; + MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64)); + } else + return SDValue(); + } + assert((int)MaskAlloc.size() == MaskSize && "xvshuf convert failed!"); + + EVT MaskVecTy = VT.changeVectorElementTypeToInteger(); + SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, MaskAlloc); + return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1); +} + +/// Shuffle vectors by lane to generate more optimized instructions. +/// 256-bit shuffles are always considered as 2-lane 128-bit shuffles. +/// +/// Therefore, except for the following four cases, other cases are regarded +/// as cross-lane shuffles, where optimization is relatively limited. +/// +/// - Shuffle high, low lanes of two inputs vector +/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 3, 6> +/// - Shuffle low, high lanes of two inputs vector +/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 0, 5> +/// - Shuffle low, low lanes of two inputs vector +/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 3, 6> +/// - Shuffle high, high lanes of two inputs vector +/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 0, 5> +/// +/// The first case is the closest to LoongArch instructions and the other +/// cases need to be converted to it for processing. +/// +/// This function may modify V1, V2 and Mask +static void canonicalizeShuffleVectorByLane(const SDLoc &DL, + MutableArrayRef Mask, MVT VT, + SDValue &V1, SDValue &V2, + SelectionDAG &DAG) { + + enum HalfMaskType { HighLaneTy, LowLaneTy, None }; + + int MaskSize = Mask.size(); + int HalfSize = Mask.size() / 2; + + HalfMaskType preMask = None, postMask = None; + + if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) { + return M < 0 || (M >= 0 && M < HalfSize) || + (M >= MaskSize && M < MaskSize + HalfSize); + })) + preMask = HighLaneTy; + else if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) { + return M < 0 || (M >= HalfSize && M < MaskSize) || + (M >= MaskSize + HalfSize && M < MaskSize * 2); + })) + preMask = LowLaneTy; + + if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) { + return M < 0 || (M >= 0 && M < HalfSize) || + (M >= MaskSize && M < MaskSize + HalfSize); + })) + postMask = HighLaneTy; + else if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) { + return M < 0 || (M >= HalfSize && M < MaskSize) || + (M >= MaskSize + HalfSize && M < MaskSize * 2); + })) + postMask = LowLaneTy; + + // The pre-half of mask is high lane type, and the post-half of mask + // is low lane type, which is closest to the LoongArch instructions. + // + // Note: In the LoongArch architecture, the high lane of mask corresponds + // to the lower 128-bit of vector register, and the low lane of mask + // corresponds the higher 128-bit of vector register. + if (preMask == HighLaneTy && postMask == LowLaneTy) { + return; + } + if (preMask == LowLaneTy && postMask == HighLaneTy) { + V1 = DAG.getBitcast(MVT::v4i64, V1); + V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1, + DAG.getConstant(0b01001110, DL, MVT::i64)); + V1 = DAG.getBitcast(VT, V1); + + if (!V2.isUndef()) { + V2 = DAG.getBitcast(MVT::v4i64, V2); + V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2, + DAG.getConstant(0b01001110, DL, MVT::i64)); + V2 = DAG.getBitcast(VT, V2); + } + + for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) { + *it = *it < 0 ? *it : *it - HalfSize; + } + for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) { + *it = *it < 0 ? *it : *it + HalfSize; + } + } else if (preMask == LowLaneTy && postMask == LowLaneTy) { + V1 = DAG.getBitcast(MVT::v4i64, V1); + V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1, + DAG.getConstant(0b11101110, DL, MVT::i64)); + V1 = DAG.getBitcast(VT, V1); + + if (!V2.isUndef()) { + V2 = DAG.getBitcast(MVT::v4i64, V2); + V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2, + DAG.getConstant(0b11101110, DL, MVT::i64)); + V2 = DAG.getBitcast(VT, V2); + } + + for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) { + *it = *it < 0 ? *it : *it - HalfSize; + } + } else if (preMask == HighLaneTy && postMask == HighLaneTy) { + V1 = DAG.getBitcast(MVT::v4i64, V1); + V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1, + DAG.getConstant(0b01000100, DL, MVT::i64)); + V1 = DAG.getBitcast(VT, V1); + + if (!V2.isUndef()) { + V2 = DAG.getBitcast(MVT::v4i64, V2); + V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2, + DAG.getConstant(0b01000100, DL, MVT::i64)); + V2 = DAG.getBitcast(VT, V2); + } + + for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) { + *it = *it < 0 ? *it : *it + HalfSize; + } + } else { // cross-lane + return; + } +} + +/// Dispatching routine to lower various 256-bit LoongArch vector shuffles. +/// +/// This routine breaks down the specific type of 256-bit shuffle and +/// dispatches to the lowering routines accordingly. +static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef Mask, MVT VT, + SDValue V1, SDValue V2, SelectionDAG &DAG) { + assert((VT.SimpleTy == MVT::v32i8 || VT.SimpleTy == MVT::v16i16 || + VT.SimpleTy == MVT::v8i32 || VT.SimpleTy == MVT::v4i64 || + VT.SimpleTy == MVT::v8f32 || VT.SimpleTy == MVT::v4f64) && + "Vector type is unsupported for lasx!"); + assert(V1.getSimpleValueType() == V2.getSimpleValueType() && + "Two operands have different types!"); + assert(VT.getVectorNumElements() == Mask.size() && + "Unexpected mask size for shuffle!"); + assert(Mask.size() % 2 == 0 && "Expected even mask size."); + assert(Mask.size() >= 4 && "Mask size is less than 4."); + + // canonicalize non cross-lane shuffle vector + SmallVector NewMask(Mask.begin(), Mask.end()); + canonicalizeShuffleVectorByLane(DL, NewMask, VT, V1, V2, DAG); + + SDValue Result; + // TODO: Add more comparison patterns. + if (V2.isUndef()) { + if ((Result = lowerVECTOR_SHUFFLE_XVREPLVEI(DL, NewMask, VT, V1, V2, DAG))) + return Result; + if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, NewMask, VT, V1, V2, DAG))) + return Result; + + // TODO: This comment may be enabled in the future to better match the + // pattern for instruction selection. + /* V2 = V1; */ + } + + // It is recommended not to change the pattern comparison order for better + // performance. + if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV(DL, NewMask, VT, V1, V2, DAG))) + return Result; + if ((Result = lowerVECTOR_SHUFFLE_XVPACKOD(DL, NewMask, VT, V1, V2, DAG))) + return Result; + if ((Result = lowerVECTOR_SHUFFLE_XVILVH(DL, NewMask, VT, V1, V2, DAG))) + return Result; + if ((Result = lowerVECTOR_SHUFFLE_XVILVL(DL, NewMask, VT, V1, V2, DAG))) + return Result; + if ((Result = lowerVECTOR_SHUFFLE_XVPICKEV(DL, NewMask, VT, V1, V2, DAG))) + return Result; + if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD(DL, NewMask, VT, V1, V2, DAG))) + return Result; + if ((Result = lowerVECTOR_SHUFFLE_XVSHUF(DL, NewMask, VT, V1, V2, DAG))) + return Result; + + return SDValue(); +} + +SDValue LoongArchTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op, + SelectionDAG &DAG) const { + ShuffleVectorSDNode *SVOp = cast(Op); + ArrayRef OrigMask = SVOp->getMask(); + SDValue V1 = Op.getOperand(0); + SDValue V2 = Op.getOperand(1); + MVT VT = Op.getSimpleValueType(); + int NumElements = VT.getVectorNumElements(); + SDLoc DL(Op); + + bool V1IsUndef = V1.isUndef(); + bool V2IsUndef = V2.isUndef(); + if (V1IsUndef && V2IsUndef) + return DAG.getUNDEF(VT); + + // When we create a shuffle node we put the UNDEF node to second operand, + // but in some cases the first operand may be transformed to UNDEF. + // In this case we should just commute the node. + if (V1IsUndef) + return DAG.getCommutedVectorShuffle(*SVOp); + + // Check for non-undef masks pointing at an undef vector and make the masks + // undef as well. This makes it easier to match the shuffle based solely on + // the mask. + if (V2IsUndef && + any_of(OrigMask, [NumElements](int M) { return M >= NumElements; })) { + SmallVector NewMask(OrigMask.begin(), OrigMask.end()); + for (int &M : NewMask) + if (M >= NumElements) + M = -1; + return DAG.getVectorShuffle(VT, DL, V1, V2, NewMask); + } + + // Check for illegal shuffle mask element index values. + int MaskUpperLimit = OrigMask.size() * (V2IsUndef ? 1 : 2); + (void)MaskUpperLimit; + assert(llvm::all_of(OrigMask, + [&](int M) { return -1 <= M && M < MaskUpperLimit; }) && + "Out of bounds shuffle index"); + + // For each vector width, delegate to a specialized lowering routine. + if (VT.is128BitVector()) + return lower128BitShuffle(DL, OrigMask, VT, V1, V2, DAG); + + if (VT.is256BitVector()) + return lower256BitShuffle(DL, OrigMask, VT, V1, V2, DAG); + + return SDValue(); +} + +static bool isConstantOrUndef(const SDValue Op) { + if (Op->isUndef()) + return true; + if (isa(Op)) + return true; + if (isa(Op)) + return true; + return false; +} + +static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op) { + for (unsigned i = 0; i < Op->getNumOperands(); ++i) + if (isConstantOrUndef(Op->getOperand(i))) + return true; + return false; +} - setOperationAction(ISD::BR_CC, GRLenVT, Expand); - setOperationAction(ISD::SELECT_CC, GRLenVT, Expand); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); - setOperationAction({ISD::SMUL_LOHI, ISD::UMUL_LOHI}, GRLenVT, Expand); - if (!Subtarget.is64Bit()) - setLibcallName(RTLIB::MUL_I128, nullptr); +SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op, + SelectionDAG &DAG) const { + BuildVectorSDNode *Node = cast(Op); + EVT ResTy = Op->getValueType(0); + SDLoc DL(Op); + APInt SplatValue, SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + bool Is128Vec = ResTy.is128BitVector(); + bool Is256Vec = ResTy.is256BitVector(); + + if ((!Subtarget.hasExtLSX() || !Is128Vec) && + (!Subtarget.hasExtLASX() || !Is256Vec)) + return SDValue(); - setOperationAction(ISD::FP_TO_UINT, GRLenVT, Custom); - setOperationAction(ISD::UINT_TO_FP, GRLenVT, Expand); - if ((Subtarget.is64Bit() && Subtarget.hasBasicF() && - !Subtarget.hasBasicD())) { - setOperationAction(ISD::SINT_TO_FP, GRLenVT, Custom); - setOperationAction(ISD::UINT_TO_FP, GRLenVT, Custom); - } + if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs, + /*MinSplatBits=*/8) && + SplatBitSize <= 64) { + // We can only cope with 8, 16, 32, or 64-bit elements. + if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 && + SplatBitSize != 64) + return SDValue(); - // Compute derived properties from the register classes. - computeRegisterProperties(STI.getRegisterInfo()); + EVT ViaVecTy; - setStackPointerRegisterToSaveRestore(LoongArch::R3); + switch (SplatBitSize) { + default: + return SDValue(); + case 8: + ViaVecTy = Is128Vec ? MVT::v16i8 : MVT::v32i8; + break; + case 16: + ViaVecTy = Is128Vec ? MVT::v8i16 : MVT::v16i16; + break; + case 32: + ViaVecTy = Is128Vec ? MVT::v4i32 : MVT::v8i32; + break; + case 64: + ViaVecTy = Is128Vec ? MVT::v2i64 : MVT::v4i64; + break; + } - setBooleanContents(ZeroOrOneBooleanContent); + // SelectionDAG::getConstant will promote SplatValue appropriately. + SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy); - setMaxAtomicSizeInBitsSupported(Subtarget.getGRLen()); + // Bitcast to the type we originally wanted. + if (ViaVecTy != ResTy) + Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result); - setMinCmpXchgSizeInBits(32); + return Result; + } - // Function alignments. - const Align FunctionAlignment(4); - setMinFunctionAlignment(FunctionAlignment); + if (DAG.isSplatValue(Op, /*AllowUndefs=*/false)) + return Op; - setTargetDAGCombine(ISD::AND); - setTargetDAGCombine(ISD::OR); - setTargetDAGCombine(ISD::SRL); + if (!isConstantOrUndefBUILD_VECTOR(Node)) { + // Use INSERT_VECTOR_ELT operations rather than expand to stores. + // The resulting code is the same length as the expansion, but it doesn't + // use memory operations. + EVT ResTy = Node->getValueType(0); + + assert(ResTy.isVector()); + + unsigned NumElts = ResTy.getVectorNumElements(); + SDValue Vector = DAG.getUNDEF(ResTy); + for (unsigned i = 0; i < NumElts; ++i) { + Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector, + Node->getOperand(i), + DAG.getConstant(i, DL, Subtarget.getGRLenVT())); + } + return Vector; + } + + return SDValue(); } -bool LoongArchTargetLowering::isOffsetFoldingLegal( - const GlobalAddressSDNode *GA) const { - // In order to maximise the opportunity for common subexpression elimination, - // keep a separate ADD node for the global address offset instead of folding - // it in the global address node. Later peephole optimisations may choose to - // fold it back in when profitable. - return false; +SDValue +LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, + SelectionDAG &DAG) const { + EVT VecTy = Op->getOperand(0)->getValueType(0); + SDValue Idx = Op->getOperand(1); + EVT EltTy = VecTy.getVectorElementType(); + unsigned NumElts = VecTy.getVectorNumElements(); + + if (isa(Idx) && + (EltTy == MVT::i32 || EltTy == MVT::i64 || EltTy == MVT::f32 || + EltTy == MVT::f64 || + cast(Idx)->getZExtValue() < NumElts / 2)) + return Op; + + return SDValue(); } -SDValue LoongArchTargetLowering::LowerOperation(SDValue Op, +SDValue +LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { - switch (Op.getOpcode()) { - case ISD::EH_DWARF_CFA: - return lowerEH_DWARF_CFA(Op, DAG); - case ISD::GlobalAddress: - return lowerGlobalAddress(Op, DAG); - case ISD::GlobalTLSAddress: - return lowerGlobalTLSAddress(Op, DAG); - case ISD::INTRINSIC_WO_CHAIN: - return lowerINTRINSIC_WO_CHAIN(Op, DAG); - case ISD::INTRINSIC_W_CHAIN: - return lowerINTRINSIC_W_CHAIN(Op, DAG); - case ISD::INTRINSIC_VOID: - return lowerINTRINSIC_VOID(Op, DAG); - case ISD::BlockAddress: - return lowerBlockAddress(Op, DAG); - case ISD::JumpTable: - return lowerJumpTable(Op, DAG); - case ISD::SHL_PARTS: - return lowerShiftLeftParts(Op, DAG); - case ISD::SRA_PARTS: - return lowerShiftRightParts(Op, DAG, true); - case ISD::SRL_PARTS: - return lowerShiftRightParts(Op, DAG, false); - case ISD::ConstantPool: - return lowerConstantPool(Op, DAG); - case ISD::FP_TO_SINT: - return lowerFP_TO_SINT(Op, DAG); - case ISD::BITCAST: - return lowerBITCAST(Op, DAG); - case ISD::UINT_TO_FP: - return lowerUINT_TO_FP(Op, DAG); - case ISD::SINT_TO_FP: - return lowerSINT_TO_FP(Op, DAG); - case ISD::VASTART: - return lowerVASTART(Op, DAG); - case ISD::FRAMEADDR: - return lowerFRAMEADDR(Op, DAG); - case ISD::RETURNADDR: - return lowerRETURNADDR(Op, DAG); - case ISD::WRITE_REGISTER: - return lowerWRITE_REGISTER(Op, DAG); - } + if (isa(Op->getOperand(2))) + return Op; return SDValue(); } @@ -465,53 +1662,99 @@ static SDValue getTargetNode(JumpTableSDNode *N, SDLoc DL, EVT Ty, template SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG, + CodeModel::Model M, bool IsLocal) const { SDLoc DL(N); EVT Ty = getPointerTy(DAG.getDataLayout()); SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0); - // TODO: Check CodeModel. - if (IsLocal) - // This generates the pattern (PseudoLA_PCREL sym), which expands to - // (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)). - return SDValue(DAG.getMachineNode(LoongArch::PseudoLA_PCREL, DL, Ty, Addr), - 0); - // This generates the pattern (PseudoLA_GOT sym), which expands to (ld.w/d - // (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)). - return SDValue(DAG.getMachineNode(LoongArch::PseudoLA_GOT, DL, Ty, Addr), 0); + switch (M) { + default: + report_fatal_error("Unsupported code model"); + + case CodeModel::Large: { + assert(Subtarget.is64Bit() && "Large code model requires LA64"); + + // This is not actually used, but is necessary for successfully matching + // the PseudoLA_*_LARGE nodes. + SDValue Tmp = DAG.getConstant(0, DL, Ty); + if (IsLocal) + // This generates the pattern (PseudoLA_PCREL_LARGE tmp sym), that + // eventually becomes the desired 5-insn code sequence. + return SDValue(DAG.getMachineNode(LoongArch::PseudoLA_PCREL_LARGE, DL, Ty, + Tmp, Addr), + 0); + + // This generates the pattern (PseudoLA_GOT_LARGE tmp sym), that eventually + // becomes the desired 5-insn code sequence. + return SDValue( + DAG.getMachineNode(LoongArch::PseudoLA_GOT_LARGE, DL, Ty, Tmp, Addr), + 0); + } + + case CodeModel::Small: + case CodeModel::Medium: + if (IsLocal) + // This generates the pattern (PseudoLA_PCREL sym), which expands to + // (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)). + return SDValue( + DAG.getMachineNode(LoongArch::PseudoLA_PCREL, DL, Ty, Addr), 0); + + // This generates the pattern (PseudoLA_GOT sym), which expands to (ld.w/d + // (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)). + return SDValue(DAG.getMachineNode(LoongArch::PseudoLA_GOT, DL, Ty, Addr), + 0); + } } SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op, SelectionDAG &DAG) const { - return getAddr(cast(Op), DAG); + return getAddr(cast(Op), DAG, + DAG.getTarget().getCodeModel()); } SDValue LoongArchTargetLowering::lowerJumpTable(SDValue Op, SelectionDAG &DAG) const { - return getAddr(cast(Op), DAG); + return getAddr(cast(Op), DAG, + DAG.getTarget().getCodeModel()); } SDValue LoongArchTargetLowering::lowerConstantPool(SDValue Op, SelectionDAG &DAG) const { - return getAddr(cast(Op), DAG); + return getAddr(cast(Op), DAG, + DAG.getTarget().getCodeModel()); } SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { GlobalAddressSDNode *N = cast(Op); assert(N->getOffset() == 0 && "unexpected offset in global node"); - return getAddr(N, DAG, N->getGlobal()->isDSOLocal()); + auto CM = DAG.getTarget().getCodeModel(); + const GlobalValue *GV = N->getGlobal(); + + if (GV->isDSOLocal() && isa(GV)) { + if (auto GCM = dyn_cast(GV)->getCodeModel()) + CM = *GCM; + } + + return getAddr(N, DAG, CM, GV->isDSOLocal()); } SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N, SelectionDAG &DAG, - unsigned Opc) const { + unsigned Opc, + bool Large) const { SDLoc DL(N); EVT Ty = getPointerTy(DAG.getDataLayout()); MVT GRLenVT = Subtarget.getGRLenVT(); + // This is not actually used, but is necessary for successfully matching the + // PseudoLA_*_LARGE nodes. + SDValue Tmp = DAG.getConstant(0, DL, Ty); SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0); - SDValue Offset = SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0); + SDValue Offset = Large + ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0) + : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0); // Add the thread pointer. return DAG.getNode(ISD::ADD, DL, Ty, Offset, @@ -520,14 +1763,20 @@ SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N, SDValue LoongArchTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N, SelectionDAG &DAG, - unsigned Opc) const { + unsigned Opc, + bool Large) const { SDLoc DL(N); EVT Ty = getPointerTy(DAG.getDataLayout()); IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits()); + // This is not actually used, but is necessary for successfully matching the + // PseudoLA_*_LARGE nodes. + SDValue Tmp = DAG.getConstant(0, DL, Ty); + // Use a PC-relative addressing mode to access the dynamic GOT address. SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0); - SDValue Load = SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0); + SDValue Load = Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0) + : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0); // Prepare argument list to generate call. ArgListTy Args; @@ -554,6 +1803,9 @@ LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op, CallingConv::GHC) report_fatal_error("In GHC calling convention TLS is not supported"); + bool Large = DAG.getTarget().getCodeModel() == CodeModel::Large; + assert((!Large || Subtarget.is64Bit()) && "Large code model requires LA64"); + GlobalAddressSDNode *N = cast(Op); assert(N->getOffset() == 0 && "unexpected offset in global node"); // OHOS_LOCAL begin @@ -567,20 +1819,31 @@ LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op, // In this model, application code calls the dynamic linker function // __tls_get_addr to locate TLS offsets into the dynamic thread vector at // runtime. - Addr = getDynamicTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_GD); + Addr = getDynamicTLSAddr(N, DAG, + Large ? LoongArch::PseudoLA_TLS_GD_LARGE + : LoongArch::PseudoLA_TLS_GD, + Large); break; case TLSModel::LocalDynamic: // Same as GeneralDynamic, except for assembly modifiers and relocation // records. - Addr = getDynamicTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_LD); + Addr = getDynamicTLSAddr(N, DAG, + Large ? LoongArch::PseudoLA_TLS_LD_LARGE + : LoongArch::PseudoLA_TLS_LD, + Large); break; case TLSModel::InitialExec: // This model uses the GOT to resolve TLS offsets. - Addr = getStaticTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_IE); + Addr = getStaticTLSAddr(N, DAG, + Large ? LoongArch::PseudoLA_TLS_IE_LARGE + : LoongArch::PseudoLA_TLS_IE, + Large); break; case TLSModel::LocalExec: // This model is used when static linking as the TLS offsets are resolved // during program linking. + // + // This node doesn't need an extra argument for the large code model. Addr = getStaticTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_LE); break; } @@ -588,9 +1851,24 @@ LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op, return Addr; } +template +static SDValue checkIntrinsicImmArg(SDValue Op, unsigned ImmOp, + SelectionDAG &DAG, bool IsSigned = false) { + auto *CImm = cast(Op->getOperand(ImmOp)); + // Check the ImmArg. + if ((IsSigned && !isInt(CImm->getSExtValue())) || + (!IsSigned && !isUInt(CImm->getZExtValue()))) { + DAG.getContext()->emitError(Op->getOperationName(0) + + ": argument out of range."); + return DAG.getNode(ISD::UNDEF, SDLoc(Op), Op.getValueType()); + } + return SDValue(); +} + SDValue LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const { + SDLoc DL(Op); switch (Op.getConstantOperandVal(0)) { default: return SDValue(); // Don't custom lower most intrinsics. @@ -598,6 +1876,271 @@ LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, EVT PtrVT = getPointerTy(DAG.getDataLayout()); return DAG.getRegister(LoongArch::R2, PtrVT); } + case Intrinsic::loongarch_lsx_vpickve2gr_d: + case Intrinsic::loongarch_lsx_vpickve2gr_du: + case Intrinsic::loongarch_lsx_vreplvei_d: + case Intrinsic::loongarch_lasx_xvrepl128vei_d: + return checkIntrinsicImmArg<1>(Op, 2, DAG); + case Intrinsic::loongarch_lsx_vreplvei_w: + case Intrinsic::loongarch_lasx_xvrepl128vei_w: + case Intrinsic::loongarch_lasx_xvpickve2gr_d: + case Intrinsic::loongarch_lasx_xvpickve2gr_du: + case Intrinsic::loongarch_lasx_xvpickve_d: + case Intrinsic::loongarch_lasx_xvpickve_d_f: + return checkIntrinsicImmArg<2>(Op, 2, DAG); + case Intrinsic::loongarch_lasx_xvinsve0_d: + return checkIntrinsicImmArg<2>(Op, 3, DAG); + case Intrinsic::loongarch_lsx_vsat_b: + case Intrinsic::loongarch_lsx_vsat_bu: + case Intrinsic::loongarch_lsx_vrotri_b: + case Intrinsic::loongarch_lsx_vsllwil_h_b: + case Intrinsic::loongarch_lsx_vsllwil_hu_bu: + case Intrinsic::loongarch_lsx_vsrlri_b: + case Intrinsic::loongarch_lsx_vsrari_b: + case Intrinsic::loongarch_lsx_vreplvei_h: + case Intrinsic::loongarch_lasx_xvsat_b: + case Intrinsic::loongarch_lasx_xvsat_bu: + case Intrinsic::loongarch_lasx_xvrotri_b: + case Intrinsic::loongarch_lasx_xvsllwil_h_b: + case Intrinsic::loongarch_lasx_xvsllwil_hu_bu: + case Intrinsic::loongarch_lasx_xvsrlri_b: + case Intrinsic::loongarch_lasx_xvsrari_b: + case Intrinsic::loongarch_lasx_xvrepl128vei_h: + case Intrinsic::loongarch_lasx_xvpickve_w: + case Intrinsic::loongarch_lasx_xvpickve_w_f: + return checkIntrinsicImmArg<3>(Op, 2, DAG); + case Intrinsic::loongarch_lasx_xvinsve0_w: + return checkIntrinsicImmArg<3>(Op, 3, DAG); + case Intrinsic::loongarch_lsx_vsat_h: + case Intrinsic::loongarch_lsx_vsat_hu: + case Intrinsic::loongarch_lsx_vrotri_h: + case Intrinsic::loongarch_lsx_vsllwil_w_h: + case Intrinsic::loongarch_lsx_vsllwil_wu_hu: + case Intrinsic::loongarch_lsx_vsrlri_h: + case Intrinsic::loongarch_lsx_vsrari_h: + case Intrinsic::loongarch_lsx_vreplvei_b: + case Intrinsic::loongarch_lasx_xvsat_h: + case Intrinsic::loongarch_lasx_xvsat_hu: + case Intrinsic::loongarch_lasx_xvrotri_h: + case Intrinsic::loongarch_lasx_xvsllwil_w_h: + case Intrinsic::loongarch_lasx_xvsllwil_wu_hu: + case Intrinsic::loongarch_lasx_xvsrlri_h: + case Intrinsic::loongarch_lasx_xvsrari_h: + case Intrinsic::loongarch_lasx_xvrepl128vei_b: + return checkIntrinsicImmArg<4>(Op, 2, DAG); + case Intrinsic::loongarch_lsx_vsrlni_b_h: + case Intrinsic::loongarch_lsx_vsrani_b_h: + case Intrinsic::loongarch_lsx_vsrlrni_b_h: + case Intrinsic::loongarch_lsx_vsrarni_b_h: + case Intrinsic::loongarch_lsx_vssrlni_b_h: + case Intrinsic::loongarch_lsx_vssrani_b_h: + case Intrinsic::loongarch_lsx_vssrlni_bu_h: + case Intrinsic::loongarch_lsx_vssrani_bu_h: + case Intrinsic::loongarch_lsx_vssrlrni_b_h: + case Intrinsic::loongarch_lsx_vssrarni_b_h: + case Intrinsic::loongarch_lsx_vssrlrni_bu_h: + case Intrinsic::loongarch_lsx_vssrarni_bu_h: + case Intrinsic::loongarch_lasx_xvsrlni_b_h: + case Intrinsic::loongarch_lasx_xvsrani_b_h: + case Intrinsic::loongarch_lasx_xvsrlrni_b_h: + case Intrinsic::loongarch_lasx_xvsrarni_b_h: + case Intrinsic::loongarch_lasx_xvssrlni_b_h: + case Intrinsic::loongarch_lasx_xvssrani_b_h: + case Intrinsic::loongarch_lasx_xvssrlni_bu_h: + case Intrinsic::loongarch_lasx_xvssrani_bu_h: + case Intrinsic::loongarch_lasx_xvssrlrni_b_h: + case Intrinsic::loongarch_lasx_xvssrarni_b_h: + case Intrinsic::loongarch_lasx_xvssrlrni_bu_h: + case Intrinsic::loongarch_lasx_xvssrarni_bu_h: + return checkIntrinsicImmArg<4>(Op, 3, DAG); + case Intrinsic::loongarch_lsx_vsat_w: + case Intrinsic::loongarch_lsx_vsat_wu: + case Intrinsic::loongarch_lsx_vrotri_w: + case Intrinsic::loongarch_lsx_vsllwil_d_w: + case Intrinsic::loongarch_lsx_vsllwil_du_wu: + case Intrinsic::loongarch_lsx_vsrlri_w: + case Intrinsic::loongarch_lsx_vsrari_w: + case Intrinsic::loongarch_lsx_vslei_bu: + case Intrinsic::loongarch_lsx_vslei_hu: + case Intrinsic::loongarch_lsx_vslei_wu: + case Intrinsic::loongarch_lsx_vslei_du: + case Intrinsic::loongarch_lsx_vslti_bu: + case Intrinsic::loongarch_lsx_vslti_hu: + case Intrinsic::loongarch_lsx_vslti_wu: + case Intrinsic::loongarch_lsx_vslti_du: + case Intrinsic::loongarch_lsx_vbsll_v: + case Intrinsic::loongarch_lsx_vbsrl_v: + case Intrinsic::loongarch_lasx_xvsat_w: + case Intrinsic::loongarch_lasx_xvsat_wu: + case Intrinsic::loongarch_lasx_xvrotri_w: + case Intrinsic::loongarch_lasx_xvsllwil_d_w: + case Intrinsic::loongarch_lasx_xvsllwil_du_wu: + case Intrinsic::loongarch_lasx_xvsrlri_w: + case Intrinsic::loongarch_lasx_xvsrari_w: + case Intrinsic::loongarch_lasx_xvslei_bu: + case Intrinsic::loongarch_lasx_xvslei_hu: + case Intrinsic::loongarch_lasx_xvslei_wu: + case Intrinsic::loongarch_lasx_xvslei_du: + case Intrinsic::loongarch_lasx_xvslti_bu: + case Intrinsic::loongarch_lasx_xvslti_hu: + case Intrinsic::loongarch_lasx_xvslti_wu: + case Intrinsic::loongarch_lasx_xvslti_du: + case Intrinsic::loongarch_lasx_xvbsll_v: + case Intrinsic::loongarch_lasx_xvbsrl_v: + return checkIntrinsicImmArg<5>(Op, 2, DAG); + case Intrinsic::loongarch_lsx_vseqi_b: + case Intrinsic::loongarch_lsx_vseqi_h: + case Intrinsic::loongarch_lsx_vseqi_w: + case Intrinsic::loongarch_lsx_vseqi_d: + case Intrinsic::loongarch_lsx_vslei_b: + case Intrinsic::loongarch_lsx_vslei_h: + case Intrinsic::loongarch_lsx_vslei_w: + case Intrinsic::loongarch_lsx_vslei_d: + case Intrinsic::loongarch_lsx_vslti_b: + case Intrinsic::loongarch_lsx_vslti_h: + case Intrinsic::loongarch_lsx_vslti_w: + case Intrinsic::loongarch_lsx_vslti_d: + case Intrinsic::loongarch_lasx_xvseqi_b: + case Intrinsic::loongarch_lasx_xvseqi_h: + case Intrinsic::loongarch_lasx_xvseqi_w: + case Intrinsic::loongarch_lasx_xvseqi_d: + case Intrinsic::loongarch_lasx_xvslei_b: + case Intrinsic::loongarch_lasx_xvslei_h: + case Intrinsic::loongarch_lasx_xvslei_w: + case Intrinsic::loongarch_lasx_xvslei_d: + case Intrinsic::loongarch_lasx_xvslti_b: + case Intrinsic::loongarch_lasx_xvslti_h: + case Intrinsic::loongarch_lasx_xvslti_w: + case Intrinsic::loongarch_lasx_xvslti_d: + return checkIntrinsicImmArg<5>(Op, 2, DAG, /*IsSigned=*/true); + case Intrinsic::loongarch_lsx_vsrlni_h_w: + case Intrinsic::loongarch_lsx_vsrani_h_w: + case Intrinsic::loongarch_lsx_vsrlrni_h_w: + case Intrinsic::loongarch_lsx_vsrarni_h_w: + case Intrinsic::loongarch_lsx_vssrlni_h_w: + case Intrinsic::loongarch_lsx_vssrani_h_w: + case Intrinsic::loongarch_lsx_vssrlni_hu_w: + case Intrinsic::loongarch_lsx_vssrani_hu_w: + case Intrinsic::loongarch_lsx_vssrlrni_h_w: + case Intrinsic::loongarch_lsx_vssrarni_h_w: + case Intrinsic::loongarch_lsx_vssrlrni_hu_w: + case Intrinsic::loongarch_lsx_vssrarni_hu_w: + case Intrinsic::loongarch_lsx_vfrstpi_b: + case Intrinsic::loongarch_lsx_vfrstpi_h: + case Intrinsic::loongarch_lasx_xvsrlni_h_w: + case Intrinsic::loongarch_lasx_xvsrani_h_w: + case Intrinsic::loongarch_lasx_xvsrlrni_h_w: + case Intrinsic::loongarch_lasx_xvsrarni_h_w: + case Intrinsic::loongarch_lasx_xvssrlni_h_w: + case Intrinsic::loongarch_lasx_xvssrani_h_w: + case Intrinsic::loongarch_lasx_xvssrlni_hu_w: + case Intrinsic::loongarch_lasx_xvssrani_hu_w: + case Intrinsic::loongarch_lasx_xvssrlrni_h_w: + case Intrinsic::loongarch_lasx_xvssrarni_h_w: + case Intrinsic::loongarch_lasx_xvssrlrni_hu_w: + case Intrinsic::loongarch_lasx_xvssrarni_hu_w: + case Intrinsic::loongarch_lasx_xvfrstpi_b: + case Intrinsic::loongarch_lasx_xvfrstpi_h: + return checkIntrinsicImmArg<5>(Op, 3, DAG); + case Intrinsic::loongarch_lsx_vsat_d: + case Intrinsic::loongarch_lsx_vsat_du: + case Intrinsic::loongarch_lsx_vrotri_d: + case Intrinsic::loongarch_lsx_vsrlri_d: + case Intrinsic::loongarch_lsx_vsrari_d: + case Intrinsic::loongarch_lasx_xvsat_d: + case Intrinsic::loongarch_lasx_xvsat_du: + case Intrinsic::loongarch_lasx_xvrotri_d: + case Intrinsic::loongarch_lasx_xvsrlri_d: + case Intrinsic::loongarch_lasx_xvsrari_d: + return checkIntrinsicImmArg<6>(Op, 2, DAG); + case Intrinsic::loongarch_lsx_vsrlni_w_d: + case Intrinsic::loongarch_lsx_vsrani_w_d: + case Intrinsic::loongarch_lsx_vsrlrni_w_d: + case Intrinsic::loongarch_lsx_vsrarni_w_d: + case Intrinsic::loongarch_lsx_vssrlni_w_d: + case Intrinsic::loongarch_lsx_vssrani_w_d: + case Intrinsic::loongarch_lsx_vssrlni_wu_d: + case Intrinsic::loongarch_lsx_vssrani_wu_d: + case Intrinsic::loongarch_lsx_vssrlrni_w_d: + case Intrinsic::loongarch_lsx_vssrarni_w_d: + case Intrinsic::loongarch_lsx_vssrlrni_wu_d: + case Intrinsic::loongarch_lsx_vssrarni_wu_d: + case Intrinsic::loongarch_lasx_xvsrlni_w_d: + case Intrinsic::loongarch_lasx_xvsrani_w_d: + case Intrinsic::loongarch_lasx_xvsrlrni_w_d: + case Intrinsic::loongarch_lasx_xvsrarni_w_d: + case Intrinsic::loongarch_lasx_xvssrlni_w_d: + case Intrinsic::loongarch_lasx_xvssrani_w_d: + case Intrinsic::loongarch_lasx_xvssrlni_wu_d: + case Intrinsic::loongarch_lasx_xvssrani_wu_d: + case Intrinsic::loongarch_lasx_xvssrlrni_w_d: + case Intrinsic::loongarch_lasx_xvssrarni_w_d: + case Intrinsic::loongarch_lasx_xvssrlrni_wu_d: + case Intrinsic::loongarch_lasx_xvssrarni_wu_d: + return checkIntrinsicImmArg<6>(Op, 3, DAG); + case Intrinsic::loongarch_lsx_vsrlni_d_q: + case Intrinsic::loongarch_lsx_vsrani_d_q: + case Intrinsic::loongarch_lsx_vsrlrni_d_q: + case Intrinsic::loongarch_lsx_vsrarni_d_q: + case Intrinsic::loongarch_lsx_vssrlni_d_q: + case Intrinsic::loongarch_lsx_vssrani_d_q: + case Intrinsic::loongarch_lsx_vssrlni_du_q: + case Intrinsic::loongarch_lsx_vssrani_du_q: + case Intrinsic::loongarch_lsx_vssrlrni_d_q: + case Intrinsic::loongarch_lsx_vssrarni_d_q: + case Intrinsic::loongarch_lsx_vssrlrni_du_q: + case Intrinsic::loongarch_lsx_vssrarni_du_q: + case Intrinsic::loongarch_lasx_xvsrlni_d_q: + case Intrinsic::loongarch_lasx_xvsrani_d_q: + case Intrinsic::loongarch_lasx_xvsrlrni_d_q: + case Intrinsic::loongarch_lasx_xvsrarni_d_q: + case Intrinsic::loongarch_lasx_xvssrlni_d_q: + case Intrinsic::loongarch_lasx_xvssrani_d_q: + case Intrinsic::loongarch_lasx_xvssrlni_du_q: + case Intrinsic::loongarch_lasx_xvssrani_du_q: + case Intrinsic::loongarch_lasx_xvssrlrni_d_q: + case Intrinsic::loongarch_lasx_xvssrarni_d_q: + case Intrinsic::loongarch_lasx_xvssrlrni_du_q: + case Intrinsic::loongarch_lasx_xvssrarni_du_q: + return checkIntrinsicImmArg<7>(Op, 3, DAG); + case Intrinsic::loongarch_lsx_vnori_b: + case Intrinsic::loongarch_lsx_vshuf4i_b: + case Intrinsic::loongarch_lsx_vshuf4i_h: + case Intrinsic::loongarch_lsx_vshuf4i_w: + case Intrinsic::loongarch_lasx_xvnori_b: + case Intrinsic::loongarch_lasx_xvshuf4i_b: + case Intrinsic::loongarch_lasx_xvshuf4i_h: + case Intrinsic::loongarch_lasx_xvshuf4i_w: + case Intrinsic::loongarch_lasx_xvpermi_d: + return checkIntrinsicImmArg<8>(Op, 2, DAG); + case Intrinsic::loongarch_lsx_vshuf4i_d: + case Intrinsic::loongarch_lsx_vpermi_w: + case Intrinsic::loongarch_lsx_vbitseli_b: + case Intrinsic::loongarch_lsx_vextrins_b: + case Intrinsic::loongarch_lsx_vextrins_h: + case Intrinsic::loongarch_lsx_vextrins_w: + case Intrinsic::loongarch_lsx_vextrins_d: + case Intrinsic::loongarch_lasx_xvshuf4i_d: + case Intrinsic::loongarch_lasx_xvpermi_w: + case Intrinsic::loongarch_lasx_xvpermi_q: + case Intrinsic::loongarch_lasx_xvbitseli_b: + case Intrinsic::loongarch_lasx_xvextrins_b: + case Intrinsic::loongarch_lasx_xvextrins_h: + case Intrinsic::loongarch_lasx_xvextrins_w: + case Intrinsic::loongarch_lasx_xvextrins_d: + return checkIntrinsicImmArg<8>(Op, 3, DAG); + case Intrinsic::loongarch_lsx_vrepli_b: + case Intrinsic::loongarch_lsx_vrepli_h: + case Intrinsic::loongarch_lsx_vrepli_w: + case Intrinsic::loongarch_lsx_vrepli_d: + case Intrinsic::loongarch_lasx_xvrepli_b: + case Intrinsic::loongarch_lasx_xvrepli_h: + case Intrinsic::loongarch_lasx_xvrepli_w: + case Intrinsic::loongarch_lasx_xvrepli_d: + return checkIntrinsicImmArg<10>(Op, 1, DAG, /*IsSigned=*/true); + case Intrinsic::loongarch_lsx_vldi: + case Intrinsic::loongarch_lasx_xvldi: + return checkIntrinsicImmArg<13>(Op, 1, DAG, /*IsSigned=*/true); } } @@ -693,6 +2236,34 @@ LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, : DAG.getNode(LoongArchISD::MOVFCSR2GR, DL, {VT, MVT::Other}, {Chain, DAG.getConstant(Imm, DL, GRLenVT)}); } + case Intrinsic::loongarch_lsx_vld: + case Intrinsic::loongarch_lsx_vldrepl_b: + case Intrinsic::loongarch_lasx_xvld: + case Intrinsic::loongarch_lasx_xvldrepl_b: + return !isInt<12>(cast(Op.getOperand(3))->getSExtValue()) + ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG) + : SDValue(); + case Intrinsic::loongarch_lsx_vldrepl_h: + case Intrinsic::loongarch_lasx_xvldrepl_h: + return !isShiftedInt<11, 1>( + cast(Op.getOperand(3))->getSExtValue()) + ? emitIntrinsicWithChainErrorMessage( + Op, "argument out of range or not a multiple of 2", DAG) + : SDValue(); + case Intrinsic::loongarch_lsx_vldrepl_w: + case Intrinsic::loongarch_lasx_xvldrepl_w: + return !isShiftedInt<10, 2>( + cast(Op.getOperand(3))->getSExtValue()) + ? emitIntrinsicWithChainErrorMessage( + Op, "argument out of range or not a multiple of 4", DAG) + : SDValue(); + case Intrinsic::loongarch_lsx_vldrepl_d: + case Intrinsic::loongarch_lasx_xvldrepl_d: + return !isShiftedInt<9, 3>( + cast(Op.getOperand(3))->getSExtValue()) + ? emitIntrinsicWithChainErrorMessage( + Op, "argument out of range or not a multiple of 8", DAG) + : SDValue(); } } @@ -811,6 +2382,63 @@ SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op, : !isUInt<8>(Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) : Op; } + case Intrinsic::loongarch_lsx_vst: + case Intrinsic::loongarch_lasx_xvst: + return !isInt<12>(cast(Op.getOperand(4))->getSExtValue()) + ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) + : SDValue(); + case Intrinsic::loongarch_lasx_xvstelm_b: + return (!isInt<8>(cast(Op.getOperand(4))->getSExtValue()) || + !isUInt<5>(cast(Op.getOperand(5))->getZExtValue())) + ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) + : SDValue(); + case Intrinsic::loongarch_lsx_vstelm_b: + return (!isInt<8>(cast(Op.getOperand(4))->getSExtValue()) || + !isUInt<4>(cast(Op.getOperand(5))->getZExtValue())) + ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG) + : SDValue(); + case Intrinsic::loongarch_lasx_xvstelm_h: + return (!isShiftedInt<8, 1>( + cast(Op.getOperand(4))->getSExtValue()) || + !isUInt<4>(cast(Op.getOperand(5))->getZExtValue())) + ? emitIntrinsicErrorMessage( + Op, "argument out of range or not a multiple of 2", DAG) + : SDValue(); + case Intrinsic::loongarch_lsx_vstelm_h: + return (!isShiftedInt<8, 1>( + cast(Op.getOperand(4))->getSExtValue()) || + !isUInt<3>(cast(Op.getOperand(5))->getZExtValue())) + ? emitIntrinsicErrorMessage( + Op, "argument out of range or not a multiple of 2", DAG) + : SDValue(); + case Intrinsic::loongarch_lasx_xvstelm_w: + return (!isShiftedInt<8, 2>( + cast(Op.getOperand(4))->getSExtValue()) || + !isUInt<3>(cast(Op.getOperand(5))->getZExtValue())) + ? emitIntrinsicErrorMessage( + Op, "argument out of range or not a multiple of 4", DAG) + : SDValue(); + case Intrinsic::loongarch_lsx_vstelm_w: + return (!isShiftedInt<8, 2>( + cast(Op.getOperand(4))->getSExtValue()) || + !isUInt<2>(cast(Op.getOperand(5))->getZExtValue())) + ? emitIntrinsicErrorMessage( + Op, "argument out of range or not a multiple of 4", DAG) + : SDValue(); + case Intrinsic::loongarch_lasx_xvstelm_d: + return (!isShiftedInt<8, 3>( + cast(Op.getOperand(4))->getSExtValue()) || + !isUInt<2>(cast(Op.getOperand(5))->getZExtValue())) + ? emitIntrinsicErrorMessage( + Op, "argument out of range or not a multiple of 8", DAG) + : SDValue(); + case Intrinsic::loongarch_lsx_vstelm_d: + return (!isShiftedInt<8, 3>( + cast(Op.getOperand(4))->getSExtValue()) || + !isUInt<1>(cast(Op.getOperand(5))->getZExtValue())) + ? emitIntrinsicErrorMessage( + Op, "argument out of range or not a multiple of 8", DAG) + : SDValue(); } } @@ -962,16 +2590,122 @@ static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp, return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes); } -// Helper function that emits error message for intrinsics with chain and return -// a UNDEF and the chain as the results. -static void emitErrorAndReplaceIntrinsicWithChainResults( +// Helper function that emits error message for intrinsics with/without chain +// and return a UNDEF or and the chain as the results. +static void emitErrorAndReplaceIntrinsicResults( SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG, - StringRef ErrorMsg) { + StringRef ErrorMsg, bool WithChain = true) { DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + "."); Results.push_back(DAG.getUNDEF(N->getValueType(0))); + if (!WithChain) + return; Results.push_back(N->getOperand(0)); } +template +static void +replaceVPICKVE2GRResults(SDNode *Node, SmallVectorImpl &Results, + SelectionDAG &DAG, const LoongArchSubtarget &Subtarget, + unsigned ResOp) { + const StringRef ErrorMsgOOR = "argument out of range"; + unsigned Imm = cast(Node->getOperand(2))->getZExtValue(); + if (!isUInt(Imm)) { + emitErrorAndReplaceIntrinsicResults(Node, Results, DAG, ErrorMsgOOR, + /*WithChain=*/false); + return; + } + SDLoc DL(Node); + SDValue Vec = Node->getOperand(1); + + SDValue PickElt = + DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec, + DAG.getConstant(Imm, DL, Subtarget.getGRLenVT()), + DAG.getValueType(Vec.getValueType().getVectorElementType())); + Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, Node->getValueType(0), + PickElt.getValue(0))); +} + +static void replaceVecCondBranchResults(SDNode *N, + SmallVectorImpl &Results, + SelectionDAG &DAG, + const LoongArchSubtarget &Subtarget, + unsigned ResOp) { + SDLoc DL(N); + SDValue Vec = N->getOperand(1); + + SDValue CB = DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec); + Results.push_back( + DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), CB.getValue(0))); +} + +static void +replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl &Results, + SelectionDAG &DAG, + const LoongArchSubtarget &Subtarget) { + switch (N->getConstantOperandVal(0)) { + default: + llvm_unreachable("Unexpected Intrinsic."); + case Intrinsic::loongarch_lsx_vpickve2gr_b: + replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget, + LoongArchISD::VPICK_SEXT_ELT); + break; + case Intrinsic::loongarch_lsx_vpickve2gr_h: + case Intrinsic::loongarch_lasx_xvpickve2gr_w: + replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget, + LoongArchISD::VPICK_SEXT_ELT); + break; + case Intrinsic::loongarch_lsx_vpickve2gr_w: + replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget, + LoongArchISD::VPICK_SEXT_ELT); + break; + case Intrinsic::loongarch_lsx_vpickve2gr_bu: + replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget, + LoongArchISD::VPICK_ZEXT_ELT); + break; + case Intrinsic::loongarch_lsx_vpickve2gr_hu: + case Intrinsic::loongarch_lasx_xvpickve2gr_wu: + replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget, + LoongArchISD::VPICK_ZEXT_ELT); + break; + case Intrinsic::loongarch_lsx_vpickve2gr_wu: + replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget, + LoongArchISD::VPICK_ZEXT_ELT); + break; + case Intrinsic::loongarch_lsx_bz_b: + case Intrinsic::loongarch_lsx_bz_h: + case Intrinsic::loongarch_lsx_bz_w: + case Intrinsic::loongarch_lsx_bz_d: + case Intrinsic::loongarch_lasx_xbz_b: + case Intrinsic::loongarch_lasx_xbz_h: + case Intrinsic::loongarch_lasx_xbz_w: + case Intrinsic::loongarch_lasx_xbz_d: + replaceVecCondBranchResults(N, Results, DAG, Subtarget, + LoongArchISD::VALL_ZERO); + break; + case Intrinsic::loongarch_lsx_bz_v: + case Intrinsic::loongarch_lasx_xbz_v: + replaceVecCondBranchResults(N, Results, DAG, Subtarget, + LoongArchISD::VANY_ZERO); + break; + case Intrinsic::loongarch_lsx_bnz_b: + case Intrinsic::loongarch_lsx_bnz_h: + case Intrinsic::loongarch_lsx_bnz_w: + case Intrinsic::loongarch_lsx_bnz_d: + case Intrinsic::loongarch_lasx_xbnz_b: + case Intrinsic::loongarch_lasx_xbnz_h: + case Intrinsic::loongarch_lasx_xbnz_w: + case Intrinsic::loongarch_lasx_xbnz_d: + replaceVecCondBranchResults(N, Results, DAG, Subtarget, + LoongArchISD::VALL_NONZERO); + break; + case Intrinsic::loongarch_lsx_bnz_v: + case Intrinsic::loongarch_lasx_xbnz_v: + replaceVecCondBranchResults(N, Results, DAG, Subtarget, + LoongArchISD::VANY_NONZERO); + break; + } +} + void LoongArchTargetLowering::ReplaceNodeResults( SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG) const { SDLoc DL(N); @@ -1104,14 +2838,12 @@ void LoongArchTargetLowering::ReplaceNodeResults( llvm_unreachable("Unexpected Intrinsic."); case Intrinsic::loongarch_movfcsr2gr: { if (!Subtarget.hasBasicF()) { - emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG, - ErrorMsgReqF); + emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqF); return; } unsigned Imm = cast(Op2)->getZExtValue(); if (!isUInt<2>(Imm)) { - emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG, - ErrorMsgOOR); + emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR); return; } SDValue MOVFCSR2GRResults = DAG.getNode( @@ -1147,7 +2879,7 @@ void LoongArchTargetLowering::ReplaceNodeResults( {Chain, Op2, \ DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \ Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \ - Results.push_back(NODE.getValue(1)); \ + Results.push_back(NODE.getValue(1)); \ break; \ } CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W) @@ -1156,8 +2888,7 @@ void LoongArchTargetLowering::ReplaceNodeResults( #define CSR_CASE(ID) \ case Intrinsic::loongarch_##ID: { \ if (!Subtarget.is64Bit()) \ - emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG, \ - ErrorMsgReqLA64); \ + emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64); \ break; \ } CSR_CASE(csrrd_d); @@ -1168,8 +2899,7 @@ void LoongArchTargetLowering::ReplaceNodeResults( case Intrinsic::loongarch_csrrd_w: { unsigned Imm = cast(Op2)->getZExtValue(); if (!isUInt<14>(Imm)) { - emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG, - ErrorMsgOOR); + emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR); return; } SDValue CSRRDResults = @@ -1183,8 +2913,7 @@ void LoongArchTargetLowering::ReplaceNodeResults( case Intrinsic::loongarch_csrwr_w: { unsigned Imm = cast(N->getOperand(3))->getZExtValue(); if (!isUInt<14>(Imm)) { - emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG, - ErrorMsgOOR); + emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR); return; } SDValue CSRWRResults = @@ -1199,8 +2928,7 @@ void LoongArchTargetLowering::ReplaceNodeResults( case Intrinsic::loongarch_csrxchg_w: { unsigned Imm = cast(N->getOperand(4))->getZExtValue(); if (!isUInt<14>(Imm)) { - emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG, - ErrorMsgOOR); + emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR); return; } SDValue CSRXCHGResults = DAG.getNode( @@ -1238,8 +2966,7 @@ void LoongArchTargetLowering::ReplaceNodeResults( } case Intrinsic::loongarch_lddir_d: { if (!Subtarget.is64Bit()) { - emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG, - ErrorMsgReqLA64); + emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64); return; } break; @@ -1258,6 +2985,10 @@ void LoongArchTargetLowering::ReplaceNodeResults( Results.push_back(N->getOperand(0)); break; } + case ISD::INTRINSIC_WO_CHAIN: { + replaceINTRINSIC_WO_CHAINResults(N, Results, DAG, Subtarget); + break; + } } } @@ -1486,7 +3217,9 @@ Retry: return DAG.getNode( LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0), DAG.getConstant(CN1->getSExtValue() >> MaskIdx0, DL, ValTy), - DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT), + DAG.getConstant(ValBits == 32 ? (MaskIdx0 + (MaskLen0 & 31) - 1) + : (MaskIdx0 + MaskLen0 - 1), + DL, GRLenVT), DAG.getConstant(MaskIdx0, DL, GRLenVT)); } @@ -1598,6 +3331,608 @@ static SDValue performBITREV_WCombine(SDNode *N, SelectionDAG &DAG, Src.getOperand(0)); } +template +static SDValue legalizeIntrinsicImmArg(SDNode *Node, unsigned ImmOp, + SelectionDAG &DAG, + const LoongArchSubtarget &Subtarget, + bool IsSigned = false) { + SDLoc DL(Node); + auto *CImm = cast(Node->getOperand(ImmOp)); + // Check the ImmArg. + if ((IsSigned && !isInt(CImm->getSExtValue())) || + (!IsSigned && !isUInt(CImm->getZExtValue()))) { + DAG.getContext()->emitError(Node->getOperationName(0) + + ": argument out of range."); + return DAG.getNode(ISD::UNDEF, DL, Subtarget.getGRLenVT()); + } + return DAG.getConstant(CImm->getZExtValue(), DL, Subtarget.getGRLenVT()); +} + +template +static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp, + SelectionDAG &DAG, bool IsSigned = false) { + SDLoc DL(Node); + EVT ResTy = Node->getValueType(0); + auto *CImm = cast(Node->getOperand(ImmOp)); + + // Check the ImmArg. + if ((IsSigned && !isInt(CImm->getSExtValue())) || + (!IsSigned && !isUInt(CImm->getZExtValue()))) { + DAG.getContext()->emitError(Node->getOperationName(0) + + ": argument out of range."); + return DAG.getNode(ISD::UNDEF, DL, ResTy); + } + return DAG.getConstant( + APInt(ResTy.getScalarType().getSizeInBits(), + IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned), + DL, ResTy); +} + +static SDValue truncateVecElts(SDNode *Node, SelectionDAG &DAG) { + SDLoc DL(Node); + EVT ResTy = Node->getValueType(0); + SDValue Vec = Node->getOperand(2); + SDValue Mask = DAG.getConstant(Vec.getScalarValueSizeInBits() - 1, DL, ResTy); + return DAG.getNode(ISD::AND, DL, ResTy, Vec, Mask); +} + +static SDValue lowerVectorBitClear(SDNode *Node, SelectionDAG &DAG) { + SDLoc DL(Node); + EVT ResTy = Node->getValueType(0); + SDValue One = DAG.getConstant(1, DL, ResTy); + SDValue Bit = + DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Node, DAG)); + + return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1), + DAG.getNOT(DL, Bit, ResTy)); +} + +template +static SDValue lowerVectorBitClearImm(SDNode *Node, SelectionDAG &DAG) { + SDLoc DL(Node); + EVT ResTy = Node->getValueType(0); + auto *CImm = cast(Node->getOperand(2)); + // Check the unsigned ImmArg. + if (!isUInt(CImm->getZExtValue())) { + DAG.getContext()->emitError(Node->getOperationName(0) + + ": argument out of range."); + return DAG.getNode(ISD::UNDEF, DL, ResTy); + } + + APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue(); + SDValue Mask = DAG.getConstant(~BitImm, DL, ResTy); + + return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1), Mask); +} + +template +static SDValue lowerVectorBitSetImm(SDNode *Node, SelectionDAG &DAG) { + SDLoc DL(Node); + EVT ResTy = Node->getValueType(0); + auto *CImm = cast(Node->getOperand(2)); + // Check the unsigned ImmArg. + if (!isUInt(CImm->getZExtValue())) { + DAG.getContext()->emitError(Node->getOperationName(0) + + ": argument out of range."); + return DAG.getNode(ISD::UNDEF, DL, ResTy); + } + + APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue(); + SDValue BitImm = DAG.getConstant(Imm, DL, ResTy); + return DAG.getNode(ISD::OR, DL, ResTy, Node->getOperand(1), BitImm); +} + +template +static SDValue lowerVectorBitRevImm(SDNode *Node, SelectionDAG &DAG) { + SDLoc DL(Node); + EVT ResTy = Node->getValueType(0); + auto *CImm = cast(Node->getOperand(2)); + // Check the unsigned ImmArg. + if (!isUInt(CImm->getZExtValue())) { + DAG.getContext()->emitError(Node->getOperationName(0) + + ": argument out of range."); + return DAG.getNode(ISD::UNDEF, DL, ResTy); + } + + APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue(); + SDValue BitImm = DAG.getConstant(Imm, DL, ResTy); + return DAG.getNode(ISD::XOR, DL, ResTy, Node->getOperand(1), BitImm); +} + +static SDValue +performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const LoongArchSubtarget &Subtarget) { + SDLoc DL(N); + switch (N->getConstantOperandVal(0)) { + default: + break; + case Intrinsic::loongarch_lsx_vadd_b: + case Intrinsic::loongarch_lsx_vadd_h: + case Intrinsic::loongarch_lsx_vadd_w: + case Intrinsic::loongarch_lsx_vadd_d: + case Intrinsic::loongarch_lasx_xvadd_b: + case Intrinsic::loongarch_lasx_xvadd_h: + case Intrinsic::loongarch_lasx_xvadd_w: + case Intrinsic::loongarch_lasx_xvadd_d: + return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1), + N->getOperand(2)); + case Intrinsic::loongarch_lsx_vaddi_bu: + case Intrinsic::loongarch_lsx_vaddi_hu: + case Intrinsic::loongarch_lsx_vaddi_wu: + case Intrinsic::loongarch_lsx_vaddi_du: + case Intrinsic::loongarch_lasx_xvaddi_bu: + case Intrinsic::loongarch_lasx_xvaddi_hu: + case Intrinsic::loongarch_lasx_xvaddi_wu: + case Intrinsic::loongarch_lasx_xvaddi_du: + return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1), + lowerVectorSplatImm<5>(N, 2, DAG)); + case Intrinsic::loongarch_lsx_vsub_b: + case Intrinsic::loongarch_lsx_vsub_h: + case Intrinsic::loongarch_lsx_vsub_w: + case Intrinsic::loongarch_lsx_vsub_d: + case Intrinsic::loongarch_lasx_xvsub_b: + case Intrinsic::loongarch_lasx_xvsub_h: + case Intrinsic::loongarch_lasx_xvsub_w: + case Intrinsic::loongarch_lasx_xvsub_d: + return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1), + N->getOperand(2)); + case Intrinsic::loongarch_lsx_vsubi_bu: + case Intrinsic::loongarch_lsx_vsubi_hu: + case Intrinsic::loongarch_lsx_vsubi_wu: + case Intrinsic::loongarch_lsx_vsubi_du: + case Intrinsic::loongarch_lasx_xvsubi_bu: + case Intrinsic::loongarch_lasx_xvsubi_hu: + case Intrinsic::loongarch_lasx_xvsubi_wu: + case Intrinsic::loongarch_lasx_xvsubi_du: + return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1), + lowerVectorSplatImm<5>(N, 2, DAG)); + case Intrinsic::loongarch_lsx_vneg_b: + case Intrinsic::loongarch_lsx_vneg_h: + case Intrinsic::loongarch_lsx_vneg_w: + case Intrinsic::loongarch_lsx_vneg_d: + case Intrinsic::loongarch_lasx_xvneg_b: + case Intrinsic::loongarch_lasx_xvneg_h: + case Intrinsic::loongarch_lasx_xvneg_w: + case Intrinsic::loongarch_lasx_xvneg_d: + return DAG.getNode( + ISD::SUB, DL, N->getValueType(0), + DAG.getConstant( + APInt(N->getValueType(0).getScalarType().getSizeInBits(), 0, + /*isSigned=*/true), + SDLoc(N), N->getValueType(0)), + N->getOperand(1)); + case Intrinsic::loongarch_lsx_vmax_b: + case Intrinsic::loongarch_lsx_vmax_h: + case Intrinsic::loongarch_lsx_vmax_w: + case Intrinsic::loongarch_lsx_vmax_d: + case Intrinsic::loongarch_lasx_xvmax_b: + case Intrinsic::loongarch_lasx_xvmax_h: + case Intrinsic::loongarch_lasx_xvmax_w: + case Intrinsic::loongarch_lasx_xvmax_d: + return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1), + N->getOperand(2)); + case Intrinsic::loongarch_lsx_vmax_bu: + case Intrinsic::loongarch_lsx_vmax_hu: + case Intrinsic::loongarch_lsx_vmax_wu: + case Intrinsic::loongarch_lsx_vmax_du: + case Intrinsic::loongarch_lasx_xvmax_bu: + case Intrinsic::loongarch_lasx_xvmax_hu: + case Intrinsic::loongarch_lasx_xvmax_wu: + case Intrinsic::loongarch_lasx_xvmax_du: + return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1), + N->getOperand(2)); + case Intrinsic::loongarch_lsx_vmaxi_b: + case Intrinsic::loongarch_lsx_vmaxi_h: + case Intrinsic::loongarch_lsx_vmaxi_w: + case Intrinsic::loongarch_lsx_vmaxi_d: + case Intrinsic::loongarch_lasx_xvmaxi_b: + case Intrinsic::loongarch_lasx_xvmaxi_h: + case Intrinsic::loongarch_lasx_xvmaxi_w: + case Intrinsic::loongarch_lasx_xvmaxi_d: + return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1), + lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true)); + case Intrinsic::loongarch_lsx_vmaxi_bu: + case Intrinsic::loongarch_lsx_vmaxi_hu: + case Intrinsic::loongarch_lsx_vmaxi_wu: + case Intrinsic::loongarch_lsx_vmaxi_du: + case Intrinsic::loongarch_lasx_xvmaxi_bu: + case Intrinsic::loongarch_lasx_xvmaxi_hu: + case Intrinsic::loongarch_lasx_xvmaxi_wu: + case Intrinsic::loongarch_lasx_xvmaxi_du: + return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1), + lowerVectorSplatImm<5>(N, 2, DAG)); + case Intrinsic::loongarch_lsx_vmin_b: + case Intrinsic::loongarch_lsx_vmin_h: + case Intrinsic::loongarch_lsx_vmin_w: + case Intrinsic::loongarch_lsx_vmin_d: + case Intrinsic::loongarch_lasx_xvmin_b: + case Intrinsic::loongarch_lasx_xvmin_h: + case Intrinsic::loongarch_lasx_xvmin_w: + case Intrinsic::loongarch_lasx_xvmin_d: + return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1), + N->getOperand(2)); + case Intrinsic::loongarch_lsx_vmin_bu: + case Intrinsic::loongarch_lsx_vmin_hu: + case Intrinsic::loongarch_lsx_vmin_wu: + case Intrinsic::loongarch_lsx_vmin_du: + case Intrinsic::loongarch_lasx_xvmin_bu: + case Intrinsic::loongarch_lasx_xvmin_hu: + case Intrinsic::loongarch_lasx_xvmin_wu: + case Intrinsic::loongarch_lasx_xvmin_du: + return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1), + N->getOperand(2)); + case Intrinsic::loongarch_lsx_vmini_b: + case Intrinsic::loongarch_lsx_vmini_h: + case Intrinsic::loongarch_lsx_vmini_w: + case Intrinsic::loongarch_lsx_vmini_d: + case Intrinsic::loongarch_lasx_xvmini_b: + case Intrinsic::loongarch_lasx_xvmini_h: + case Intrinsic::loongarch_lasx_xvmini_w: + case Intrinsic::loongarch_lasx_xvmini_d: + return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1), + lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true)); + case Intrinsic::loongarch_lsx_vmini_bu: + case Intrinsic::loongarch_lsx_vmini_hu: + case Intrinsic::loongarch_lsx_vmini_wu: + case Intrinsic::loongarch_lsx_vmini_du: + case Intrinsic::loongarch_lasx_xvmini_bu: + case Intrinsic::loongarch_lasx_xvmini_hu: + case Intrinsic::loongarch_lasx_xvmini_wu: + case Intrinsic::loongarch_lasx_xvmini_du: + return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1), + lowerVectorSplatImm<5>(N, 2, DAG)); + case Intrinsic::loongarch_lsx_vmul_b: + case Intrinsic::loongarch_lsx_vmul_h: + case Intrinsic::loongarch_lsx_vmul_w: + case Intrinsic::loongarch_lsx_vmul_d: + case Intrinsic::loongarch_lasx_xvmul_b: + case Intrinsic::loongarch_lasx_xvmul_h: + case Intrinsic::loongarch_lasx_xvmul_w: + case Intrinsic::loongarch_lasx_xvmul_d: + return DAG.getNode(ISD::MUL, DL, N->getValueType(0), N->getOperand(1), + N->getOperand(2)); + case Intrinsic::loongarch_lsx_vmadd_b: + case Intrinsic::loongarch_lsx_vmadd_h: + case Intrinsic::loongarch_lsx_vmadd_w: + case Intrinsic::loongarch_lsx_vmadd_d: + case Intrinsic::loongarch_lasx_xvmadd_b: + case Intrinsic::loongarch_lasx_xvmadd_h: + case Intrinsic::loongarch_lasx_xvmadd_w: + case Intrinsic::loongarch_lasx_xvmadd_d: { + EVT ResTy = N->getValueType(0); + return DAG.getNode(ISD::ADD, SDLoc(N), ResTy, N->getOperand(1), + DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2), + N->getOperand(3))); + } + case Intrinsic::loongarch_lsx_vmsub_b: + case Intrinsic::loongarch_lsx_vmsub_h: + case Intrinsic::loongarch_lsx_vmsub_w: + case Intrinsic::loongarch_lsx_vmsub_d: + case Intrinsic::loongarch_lasx_xvmsub_b: + case Intrinsic::loongarch_lasx_xvmsub_h: + case Intrinsic::loongarch_lasx_xvmsub_w: + case Intrinsic::loongarch_lasx_xvmsub_d: { + EVT ResTy = N->getValueType(0); + return DAG.getNode(ISD::SUB, SDLoc(N), ResTy, N->getOperand(1), + DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2), + N->getOperand(3))); + } + case Intrinsic::loongarch_lsx_vdiv_b: + case Intrinsic::loongarch_lsx_vdiv_h: + case Intrinsic::loongarch_lsx_vdiv_w: + case Intrinsic::loongarch_lsx_vdiv_d: + case Intrinsic::loongarch_lasx_xvdiv_b: + case Intrinsic::loongarch_lasx_xvdiv_h: + case Intrinsic::loongarch_lasx_xvdiv_w: + case Intrinsic::loongarch_lasx_xvdiv_d: + return DAG.getNode(ISD::SDIV, DL, N->getValueType(0), N->getOperand(1), + N->getOperand(2)); + case Intrinsic::loongarch_lsx_vdiv_bu: + case Intrinsic::loongarch_lsx_vdiv_hu: + case Intrinsic::loongarch_lsx_vdiv_wu: + case Intrinsic::loongarch_lsx_vdiv_du: + case Intrinsic::loongarch_lasx_xvdiv_bu: + case Intrinsic::loongarch_lasx_xvdiv_hu: + case Intrinsic::loongarch_lasx_xvdiv_wu: + case Intrinsic::loongarch_lasx_xvdiv_du: + return DAG.getNode(ISD::UDIV, DL, N->getValueType(0), N->getOperand(1), + N->getOperand(2)); + case Intrinsic::loongarch_lsx_vmod_b: + case Intrinsic::loongarch_lsx_vmod_h: + case Intrinsic::loongarch_lsx_vmod_w: + case Intrinsic::loongarch_lsx_vmod_d: + case Intrinsic::loongarch_lasx_xvmod_b: + case Intrinsic::loongarch_lasx_xvmod_h: + case Intrinsic::loongarch_lasx_xvmod_w: + case Intrinsic::loongarch_lasx_xvmod_d: + return DAG.getNode(ISD::SREM, DL, N->getValueType(0), N->getOperand(1), + N->getOperand(2)); + case Intrinsic::loongarch_lsx_vmod_bu: + case Intrinsic::loongarch_lsx_vmod_hu: + case Intrinsic::loongarch_lsx_vmod_wu: + case Intrinsic::loongarch_lsx_vmod_du: + case Intrinsic::loongarch_lasx_xvmod_bu: + case Intrinsic::loongarch_lasx_xvmod_hu: + case Intrinsic::loongarch_lasx_xvmod_wu: + case Intrinsic::loongarch_lasx_xvmod_du: + return DAG.getNode(ISD::UREM, DL, N->getValueType(0), N->getOperand(1), + N->getOperand(2)); + case Intrinsic::loongarch_lsx_vand_v: + case Intrinsic::loongarch_lasx_xvand_v: + return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1), + N->getOperand(2)); + case Intrinsic::loongarch_lsx_vor_v: + case Intrinsic::loongarch_lasx_xvor_v: + return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1), + N->getOperand(2)); + case Intrinsic::loongarch_lsx_vxor_v: + case Intrinsic::loongarch_lasx_xvxor_v: + return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1), + N->getOperand(2)); + case Intrinsic::loongarch_lsx_vnor_v: + case Intrinsic::loongarch_lasx_xvnor_v: { + SDValue Res = DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1), + N->getOperand(2)); + return DAG.getNOT(DL, Res, Res->getValueType(0)); + } + case Intrinsic::loongarch_lsx_vandi_b: + case Intrinsic::loongarch_lasx_xvandi_b: + return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1), + lowerVectorSplatImm<8>(N, 2, DAG)); + case Intrinsic::loongarch_lsx_vori_b: + case Intrinsic::loongarch_lasx_xvori_b: + return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1), + lowerVectorSplatImm<8>(N, 2, DAG)); + case Intrinsic::loongarch_lsx_vxori_b: + case Intrinsic::loongarch_lasx_xvxori_b: + return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1), + lowerVectorSplatImm<8>(N, 2, DAG)); + case Intrinsic::loongarch_lsx_vsll_b: + case Intrinsic::loongarch_lsx_vsll_h: + case Intrinsic::loongarch_lsx_vsll_w: + case Intrinsic::loongarch_lsx_vsll_d: + case Intrinsic::loongarch_lasx_xvsll_b: + case Intrinsic::loongarch_lasx_xvsll_h: + case Intrinsic::loongarch_lasx_xvsll_w: + case Intrinsic::loongarch_lasx_xvsll_d: + return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1), + truncateVecElts(N, DAG)); + case Intrinsic::loongarch_lsx_vslli_b: + case Intrinsic::loongarch_lasx_xvslli_b: + return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1), + lowerVectorSplatImm<3>(N, 2, DAG)); + case Intrinsic::loongarch_lsx_vslli_h: + case Intrinsic::loongarch_lasx_xvslli_h: + return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1), + lowerVectorSplatImm<4>(N, 2, DAG)); + case Intrinsic::loongarch_lsx_vslli_w: + case Intrinsic::loongarch_lasx_xvslli_w: + return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1), + lowerVectorSplatImm<5>(N, 2, DAG)); + case Intrinsic::loongarch_lsx_vslli_d: + case Intrinsic::loongarch_lasx_xvslli_d: + return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1), + lowerVectorSplatImm<6>(N, 2, DAG)); + case Intrinsic::loongarch_lsx_vsrl_b: + case Intrinsic::loongarch_lsx_vsrl_h: + case Intrinsic::loongarch_lsx_vsrl_w: + case Intrinsic::loongarch_lsx_vsrl_d: + case Intrinsic::loongarch_lasx_xvsrl_b: + case Intrinsic::loongarch_lasx_xvsrl_h: + case Intrinsic::loongarch_lasx_xvsrl_w: + case Intrinsic::loongarch_lasx_xvsrl_d: + return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1), + truncateVecElts(N, DAG)); + case Intrinsic::loongarch_lsx_vsrli_b: + case Intrinsic::loongarch_lasx_xvsrli_b: + return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1), + lowerVectorSplatImm<3>(N, 2, DAG)); + case Intrinsic::loongarch_lsx_vsrli_h: + case Intrinsic::loongarch_lasx_xvsrli_h: + return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1), + lowerVectorSplatImm<4>(N, 2, DAG)); + case Intrinsic::loongarch_lsx_vsrli_w: + case Intrinsic::loongarch_lasx_xvsrli_w: + return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1), + lowerVectorSplatImm<5>(N, 2, DAG)); + case Intrinsic::loongarch_lsx_vsrli_d: + case Intrinsic::loongarch_lasx_xvsrli_d: + return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1), + lowerVectorSplatImm<6>(N, 2, DAG)); + case Intrinsic::loongarch_lsx_vsra_b: + case Intrinsic::loongarch_lsx_vsra_h: + case Intrinsic::loongarch_lsx_vsra_w: + case Intrinsic::loongarch_lsx_vsra_d: + case Intrinsic::loongarch_lasx_xvsra_b: + case Intrinsic::loongarch_lasx_xvsra_h: + case Intrinsic::loongarch_lasx_xvsra_w: + case Intrinsic::loongarch_lasx_xvsra_d: + return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1), + truncateVecElts(N, DAG)); + case Intrinsic::loongarch_lsx_vsrai_b: + case Intrinsic::loongarch_lasx_xvsrai_b: + return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1), + lowerVectorSplatImm<3>(N, 2, DAG)); + case Intrinsic::loongarch_lsx_vsrai_h: + case Intrinsic::loongarch_lasx_xvsrai_h: + return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1), + lowerVectorSplatImm<4>(N, 2, DAG)); + case Intrinsic::loongarch_lsx_vsrai_w: + case Intrinsic::loongarch_lasx_xvsrai_w: + return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1), + lowerVectorSplatImm<5>(N, 2, DAG)); + case Intrinsic::loongarch_lsx_vsrai_d: + case Intrinsic::loongarch_lasx_xvsrai_d: + return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1), + lowerVectorSplatImm<6>(N, 2, DAG)); + case Intrinsic::loongarch_lsx_vclz_b: + case Intrinsic::loongarch_lsx_vclz_h: + case Intrinsic::loongarch_lsx_vclz_w: + case Intrinsic::loongarch_lsx_vclz_d: + case Intrinsic::loongarch_lasx_xvclz_b: + case Intrinsic::loongarch_lasx_xvclz_h: + case Intrinsic::loongarch_lasx_xvclz_w: + case Intrinsic::loongarch_lasx_xvclz_d: + return DAG.getNode(ISD::CTLZ, DL, N->getValueType(0), N->getOperand(1)); + case Intrinsic::loongarch_lsx_vpcnt_b: + case Intrinsic::loongarch_lsx_vpcnt_h: + case Intrinsic::loongarch_lsx_vpcnt_w: + case Intrinsic::loongarch_lsx_vpcnt_d: + case Intrinsic::loongarch_lasx_xvpcnt_b: + case Intrinsic::loongarch_lasx_xvpcnt_h: + case Intrinsic::loongarch_lasx_xvpcnt_w: + case Intrinsic::loongarch_lasx_xvpcnt_d: + return DAG.getNode(ISD::CTPOP, DL, N->getValueType(0), N->getOperand(1)); + case Intrinsic::loongarch_lsx_vbitclr_b: + case Intrinsic::loongarch_lsx_vbitclr_h: + case Intrinsic::loongarch_lsx_vbitclr_w: + case Intrinsic::loongarch_lsx_vbitclr_d: + case Intrinsic::loongarch_lasx_xvbitclr_b: + case Intrinsic::loongarch_lasx_xvbitclr_h: + case Intrinsic::loongarch_lasx_xvbitclr_w: + case Intrinsic::loongarch_lasx_xvbitclr_d: + return lowerVectorBitClear(N, DAG); + case Intrinsic::loongarch_lsx_vbitclri_b: + case Intrinsic::loongarch_lasx_xvbitclri_b: + return lowerVectorBitClearImm<3>(N, DAG); + case Intrinsic::loongarch_lsx_vbitclri_h: + case Intrinsic::loongarch_lasx_xvbitclri_h: + return lowerVectorBitClearImm<4>(N, DAG); + case Intrinsic::loongarch_lsx_vbitclri_w: + case Intrinsic::loongarch_lasx_xvbitclri_w: + return lowerVectorBitClearImm<5>(N, DAG); + case Intrinsic::loongarch_lsx_vbitclri_d: + case Intrinsic::loongarch_lasx_xvbitclri_d: + return lowerVectorBitClearImm<6>(N, DAG); + case Intrinsic::loongarch_lsx_vbitset_b: + case Intrinsic::loongarch_lsx_vbitset_h: + case Intrinsic::loongarch_lsx_vbitset_w: + case Intrinsic::loongarch_lsx_vbitset_d: + case Intrinsic::loongarch_lasx_xvbitset_b: + case Intrinsic::loongarch_lasx_xvbitset_h: + case Intrinsic::loongarch_lasx_xvbitset_w: + case Intrinsic::loongarch_lasx_xvbitset_d: { + EVT VecTy = N->getValueType(0); + SDValue One = DAG.getConstant(1, DL, VecTy); + return DAG.getNode( + ISD::OR, DL, VecTy, N->getOperand(1), + DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG))); + } + case Intrinsic::loongarch_lsx_vbitseti_b: + case Intrinsic::loongarch_lasx_xvbitseti_b: + return lowerVectorBitSetImm<3>(N, DAG); + case Intrinsic::loongarch_lsx_vbitseti_h: + case Intrinsic::loongarch_lasx_xvbitseti_h: + return lowerVectorBitSetImm<4>(N, DAG); + case Intrinsic::loongarch_lsx_vbitseti_w: + case Intrinsic::loongarch_lasx_xvbitseti_w: + return lowerVectorBitSetImm<5>(N, DAG); + case Intrinsic::loongarch_lsx_vbitseti_d: + case Intrinsic::loongarch_lasx_xvbitseti_d: + return lowerVectorBitSetImm<6>(N, DAG); + case Intrinsic::loongarch_lsx_vbitrev_b: + case Intrinsic::loongarch_lsx_vbitrev_h: + case Intrinsic::loongarch_lsx_vbitrev_w: + case Intrinsic::loongarch_lsx_vbitrev_d: + case Intrinsic::loongarch_lasx_xvbitrev_b: + case Intrinsic::loongarch_lasx_xvbitrev_h: + case Intrinsic::loongarch_lasx_xvbitrev_w: + case Intrinsic::loongarch_lasx_xvbitrev_d: { + EVT VecTy = N->getValueType(0); + SDValue One = DAG.getConstant(1, DL, VecTy); + return DAG.getNode( + ISD::XOR, DL, VecTy, N->getOperand(1), + DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG))); + } + case Intrinsic::loongarch_lsx_vbitrevi_b: + case Intrinsic::loongarch_lasx_xvbitrevi_b: + return lowerVectorBitRevImm<3>(N, DAG); + case Intrinsic::loongarch_lsx_vbitrevi_h: + case Intrinsic::loongarch_lasx_xvbitrevi_h: + return lowerVectorBitRevImm<4>(N, DAG); + case Intrinsic::loongarch_lsx_vbitrevi_w: + case Intrinsic::loongarch_lasx_xvbitrevi_w: + return lowerVectorBitRevImm<5>(N, DAG); + case Intrinsic::loongarch_lsx_vbitrevi_d: + case Intrinsic::loongarch_lasx_xvbitrevi_d: + return lowerVectorBitRevImm<6>(N, DAG); + case Intrinsic::loongarch_lsx_vfadd_s: + case Intrinsic::loongarch_lsx_vfadd_d: + case Intrinsic::loongarch_lasx_xvfadd_s: + case Intrinsic::loongarch_lasx_xvfadd_d: + return DAG.getNode(ISD::FADD, DL, N->getValueType(0), N->getOperand(1), + N->getOperand(2)); + case Intrinsic::loongarch_lsx_vfsub_s: + case Intrinsic::loongarch_lsx_vfsub_d: + case Intrinsic::loongarch_lasx_xvfsub_s: + case Intrinsic::loongarch_lasx_xvfsub_d: + return DAG.getNode(ISD::FSUB, DL, N->getValueType(0), N->getOperand(1), + N->getOperand(2)); + case Intrinsic::loongarch_lsx_vfmul_s: + case Intrinsic::loongarch_lsx_vfmul_d: + case Intrinsic::loongarch_lasx_xvfmul_s: + case Intrinsic::loongarch_lasx_xvfmul_d: + return DAG.getNode(ISD::FMUL, DL, N->getValueType(0), N->getOperand(1), + N->getOperand(2)); + case Intrinsic::loongarch_lsx_vfdiv_s: + case Intrinsic::loongarch_lsx_vfdiv_d: + case Intrinsic::loongarch_lasx_xvfdiv_s: + case Intrinsic::loongarch_lasx_xvfdiv_d: + return DAG.getNode(ISD::FDIV, DL, N->getValueType(0), N->getOperand(1), + N->getOperand(2)); + case Intrinsic::loongarch_lsx_vfmadd_s: + case Intrinsic::loongarch_lsx_vfmadd_d: + case Intrinsic::loongarch_lasx_xvfmadd_s: + case Intrinsic::loongarch_lasx_xvfmadd_d: + return DAG.getNode(ISD::FMA, DL, N->getValueType(0), N->getOperand(1), + N->getOperand(2), N->getOperand(3)); + case Intrinsic::loongarch_lsx_vinsgr2vr_b: + return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0), + N->getOperand(1), N->getOperand(2), + legalizeIntrinsicImmArg<4>(N, 3, DAG, Subtarget)); + case Intrinsic::loongarch_lsx_vinsgr2vr_h: + case Intrinsic::loongarch_lasx_xvinsgr2vr_w: + return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0), + N->getOperand(1), N->getOperand(2), + legalizeIntrinsicImmArg<3>(N, 3, DAG, Subtarget)); + case Intrinsic::loongarch_lsx_vinsgr2vr_w: + case Intrinsic::loongarch_lasx_xvinsgr2vr_d: + return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0), + N->getOperand(1), N->getOperand(2), + legalizeIntrinsicImmArg<2>(N, 3, DAG, Subtarget)); + case Intrinsic::loongarch_lsx_vinsgr2vr_d: + return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0), + N->getOperand(1), N->getOperand(2), + legalizeIntrinsicImmArg<1>(N, 3, DAG, Subtarget)); + case Intrinsic::loongarch_lsx_vreplgr2vr_b: + case Intrinsic::loongarch_lsx_vreplgr2vr_h: + case Intrinsic::loongarch_lsx_vreplgr2vr_w: + case Intrinsic::loongarch_lsx_vreplgr2vr_d: + case Intrinsic::loongarch_lasx_xvreplgr2vr_b: + case Intrinsic::loongarch_lasx_xvreplgr2vr_h: + case Intrinsic::loongarch_lasx_xvreplgr2vr_w: + case Intrinsic::loongarch_lasx_xvreplgr2vr_d: { + EVT ResTy = N->getValueType(0); + SmallVector Ops(ResTy.getVectorNumElements(), N->getOperand(1)); + return DAG.getBuildVector(ResTy, DL, Ops); + } + case Intrinsic::loongarch_lsx_vreplve_b: + case Intrinsic::loongarch_lsx_vreplve_h: + case Intrinsic::loongarch_lsx_vreplve_w: + case Intrinsic::loongarch_lsx_vreplve_d: + case Intrinsic::loongarch_lasx_xvreplve_b: + case Intrinsic::loongarch_lasx_xvreplve_h: + case Intrinsic::loongarch_lasx_xvreplve_w: + case Intrinsic::loongarch_lasx_xvreplve_d: + return DAG.getNode(LoongArchISD::VREPLVE, DL, N->getValueType(0), + N->getOperand(1), + DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(), + N->getOperand(2))); + } + return SDValue(); +} + SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -1612,6 +3947,8 @@ SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N, return performSRLCombine(N, DAG, DCI, Subtarget); case LoongArchISD::BITREV_W: return performBITREV_WCombine(N, DAG, DCI, Subtarget); + case ISD::INTRINSIC_WO_CHAIN: + return performINTRINSIC_WO_CHAINCombine(N, DAG, DCI, Subtarget); } return SDValue(); } @@ -1665,6 +4002,196 @@ static MachineBasicBlock *insertDivByZeroTrap(MachineInstr &MI, return SinkMBB; } +static MachineBasicBlock * +emitVecCondBranchPseudo(MachineInstr &MI, MachineBasicBlock *BB, + const LoongArchSubtarget &Subtarget) { + unsigned CondOpc; + switch (MI.getOpcode()) { + default: + llvm_unreachable("Unexpected opcode"); + case LoongArch::PseudoVBZ: + CondOpc = LoongArch::VSETEQZ_V; + break; + case LoongArch::PseudoVBZ_B: + CondOpc = LoongArch::VSETANYEQZ_B; + break; + case LoongArch::PseudoVBZ_H: + CondOpc = LoongArch::VSETANYEQZ_H; + break; + case LoongArch::PseudoVBZ_W: + CondOpc = LoongArch::VSETANYEQZ_W; + break; + case LoongArch::PseudoVBZ_D: + CondOpc = LoongArch::VSETANYEQZ_D; + break; + case LoongArch::PseudoVBNZ: + CondOpc = LoongArch::VSETNEZ_V; + break; + case LoongArch::PseudoVBNZ_B: + CondOpc = LoongArch::VSETALLNEZ_B; + break; + case LoongArch::PseudoVBNZ_H: + CondOpc = LoongArch::VSETALLNEZ_H; + break; + case LoongArch::PseudoVBNZ_W: + CondOpc = LoongArch::VSETALLNEZ_W; + break; + case LoongArch::PseudoVBNZ_D: + CondOpc = LoongArch::VSETALLNEZ_D; + break; + case LoongArch::PseudoXVBZ: + CondOpc = LoongArch::XVSETEQZ_V; + break; + case LoongArch::PseudoXVBZ_B: + CondOpc = LoongArch::XVSETANYEQZ_B; + break; + case LoongArch::PseudoXVBZ_H: + CondOpc = LoongArch::XVSETANYEQZ_H; + break; + case LoongArch::PseudoXVBZ_W: + CondOpc = LoongArch::XVSETANYEQZ_W; + break; + case LoongArch::PseudoXVBZ_D: + CondOpc = LoongArch::XVSETANYEQZ_D; + break; + case LoongArch::PseudoXVBNZ: + CondOpc = LoongArch::XVSETNEZ_V; + break; + case LoongArch::PseudoXVBNZ_B: + CondOpc = LoongArch::XVSETALLNEZ_B; + break; + case LoongArch::PseudoXVBNZ_H: + CondOpc = LoongArch::XVSETALLNEZ_H; + break; + case LoongArch::PseudoXVBNZ_W: + CondOpc = LoongArch::XVSETALLNEZ_W; + break; + case LoongArch::PseudoXVBNZ_D: + CondOpc = LoongArch::XVSETALLNEZ_D; + break; + } + + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + DebugLoc DL = MI.getDebugLoc(); + MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); + MachineFunction::iterator It = ++BB->getIterator(); + + MachineFunction *F = BB->getParent(); + MachineBasicBlock *FalseBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *TrueBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *SinkBB = F->CreateMachineBasicBlock(LLVM_BB); + + F->insert(It, FalseBB); + F->insert(It, TrueBB); + F->insert(It, SinkBB); + + // Transfer the remainder of MBB and its successor edges to Sink. + SinkBB->splice(SinkBB->end(), BB, std::next(MI.getIterator()), BB->end()); + SinkBB->transferSuccessorsAndUpdatePHIs(BB); + + // Insert the real instruction to BB. + Register FCC = MRI.createVirtualRegister(&LoongArch::CFRRegClass); + BuildMI(BB, DL, TII->get(CondOpc), FCC).addReg(MI.getOperand(1).getReg()); + + // Insert branch. + BuildMI(BB, DL, TII->get(LoongArch::BCNEZ)).addReg(FCC).addMBB(TrueBB); + BB->addSuccessor(FalseBB); + BB->addSuccessor(TrueBB); + + // FalseBB. + Register RD1 = MRI.createVirtualRegister(&LoongArch::GPRRegClass); + BuildMI(FalseBB, DL, TII->get(LoongArch::ADDI_W), RD1) + .addReg(LoongArch::R0) + .addImm(0); + BuildMI(FalseBB, DL, TII->get(LoongArch::PseudoBR)).addMBB(SinkBB); + FalseBB->addSuccessor(SinkBB); + + // TrueBB. + Register RD2 = MRI.createVirtualRegister(&LoongArch::GPRRegClass); + BuildMI(TrueBB, DL, TII->get(LoongArch::ADDI_W), RD2) + .addReg(LoongArch::R0) + .addImm(1); + TrueBB->addSuccessor(SinkBB); + + // SinkBB: merge the results. + BuildMI(*SinkBB, SinkBB->begin(), DL, TII->get(LoongArch::PHI), + MI.getOperand(0).getReg()) + .addReg(RD1) + .addMBB(FalseBB) + .addReg(RD2) + .addMBB(TrueBB); + + // The pseudo instruction is gone now. + MI.eraseFromParent(); + return SinkBB; +} + +static MachineBasicBlock * +emitPseudoXVINSGR2VR(MachineInstr &MI, MachineBasicBlock *BB, + const LoongArchSubtarget &Subtarget) { + unsigned InsOp; + unsigned HalfSize; + switch (MI.getOpcode()) { + default: + llvm_unreachable("Unexpected opcode"); + case LoongArch::PseudoXVINSGR2VR_B: + HalfSize = 16; + InsOp = LoongArch::VINSGR2VR_B; + break; + case LoongArch::PseudoXVINSGR2VR_H: + HalfSize = 8; + InsOp = LoongArch::VINSGR2VR_H; + break; + } + const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + const TargetRegisterClass *RC = &LoongArch::LASX256RegClass; + const TargetRegisterClass *SubRC = &LoongArch::LSX128RegClass; + DebugLoc DL = MI.getDebugLoc(); + MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); + // XDst = vector_insert XSrc, Elt, Idx + Register XDst = MI.getOperand(0).getReg(); + Register XSrc = MI.getOperand(1).getReg(); + Register Elt = MI.getOperand(2).getReg(); + unsigned Idx = MI.getOperand(3).getImm(); + + Register ScratchReg1 = XSrc; + if (Idx >= HalfSize) { + ScratchReg1 = MRI.createVirtualRegister(RC); + BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), ScratchReg1) + .addReg(XSrc) + .addReg(XSrc) + .addImm(1); + } + + Register ScratchSubReg1 = MRI.createVirtualRegister(SubRC); + Register ScratchSubReg2 = MRI.createVirtualRegister(SubRC); + BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), ScratchSubReg1) + .addReg(ScratchReg1, 0, LoongArch::sub_128); + BuildMI(*BB, MI, DL, TII->get(InsOp), ScratchSubReg2) + .addReg(ScratchSubReg1) + .addReg(Elt) + .addImm(Idx >= HalfSize ? Idx - HalfSize : Idx); + + Register ScratchReg2 = XDst; + if (Idx >= HalfSize) + ScratchReg2 = MRI.createVirtualRegister(RC); + + BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), ScratchReg2) + .addImm(0) + .addReg(ScratchSubReg2) + .addImm(LoongArch::sub_128); + + if (Idx >= HalfSize) + BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), XDst) + .addReg(XSrc) + .addReg(ScratchReg2) + .addImm(2); + + MI.eraseFromParent(); + return BB; +} + MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter( MachineInstr &MI, MachineBasicBlock *BB) const { const TargetInstrInfo *TII = Subtarget.getInstrInfo(); @@ -1699,6 +4226,44 @@ MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter( MI.eraseFromParent(); return BB; } + case TargetOpcode::STATEPOINT: + // STATEPOINT is a pseudo instruction which has no implicit defs/uses + // while bl call instruction (where statepoint will be lowered at the + // end) has implicit def. This def is early-clobber as it will be set at + // the moment of the call and earlier than any use is read. + // Add this implicit dead def here as a workaround. + MI.addOperand(*MI.getMF(), + MachineOperand::CreateReg( + LoongArch::R1, /*isDef*/ true, + /*isImp*/ true, /*isKill*/ false, /*isDead*/ true, + /*isUndef*/ false, /*isEarlyClobber*/ true)); + if (!Subtarget.is64Bit()) + report_fatal_error("STATEPOINT is only supported on 64-bit targets"); + return emitPatchPoint(MI, BB); + case LoongArch::PseudoVBZ: + case LoongArch::PseudoVBZ_B: + case LoongArch::PseudoVBZ_H: + case LoongArch::PseudoVBZ_W: + case LoongArch::PseudoVBZ_D: + case LoongArch::PseudoVBNZ: + case LoongArch::PseudoVBNZ_B: + case LoongArch::PseudoVBNZ_H: + case LoongArch::PseudoVBNZ_W: + case LoongArch::PseudoVBNZ_D: + case LoongArch::PseudoXVBZ: + case LoongArch::PseudoXVBZ_B: + case LoongArch::PseudoXVBZ_H: + case LoongArch::PseudoXVBZ_W: + case LoongArch::PseudoXVBZ_D: + case LoongArch::PseudoXVBNZ: + case LoongArch::PseudoXVBNZ_B: + case LoongArch::PseudoXVBNZ_H: + case LoongArch::PseudoXVBNZ_W: + case LoongArch::PseudoXVBNZ_D: + return emitVecCondBranchPseudo(MI, BB, Subtarget); + case LoongArch::PseudoXVINSGR2VR_B: + case LoongArch::PseudoXVINSGR2VR_H: + return emitPseudoXVINSGR2VR(MI, BB, Subtarget); } } @@ -1725,8 +4290,12 @@ const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const { // TODO: Add more target-dependent nodes later. NODE_NAME_CASE(CALL) + NODE_NAME_CASE(CALL_MEDIUM) + NODE_NAME_CASE(CALL_LARGE) NODE_NAME_CASE(RET) NODE_NAME_CASE(TAIL) + NODE_NAME_CASE(TAIL_MEDIUM) + NODE_NAME_CASE(TAIL_LARGE) NODE_NAME_CASE(SLL_W) NODE_NAME_CASE(SRA_W) NODE_NAME_CASE(SRL_W) @@ -1771,6 +4340,23 @@ const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(MOVFCSR2GR) NODE_NAME_CASE(CACOP_D) NODE_NAME_CASE(CACOP_W) + NODE_NAME_CASE(VSHUF) + NODE_NAME_CASE(VPICKEV) + NODE_NAME_CASE(VPICKOD) + NODE_NAME_CASE(VPACKEV) + NODE_NAME_CASE(VPACKOD) + NODE_NAME_CASE(VILVL) + NODE_NAME_CASE(VILVH) + NODE_NAME_CASE(VSHUF4I) + NODE_NAME_CASE(VREPLVEI) + NODE_NAME_CASE(XVPERMI) + NODE_NAME_CASE(VPICK_SEXT_ELT) + NODE_NAME_CASE(VPICK_ZEXT_ELT) + NODE_NAME_CASE(VREPLVE) + NODE_NAME_CASE(VALL_ZERO) + NODE_NAME_CASE(VANY_ZERO) + NODE_NAME_CASE(VALL_NONZERO) + NODE_NAME_CASE(VANY_NONZERO) } #undef NODE_NAME_CASE return nullptr; @@ -1780,6 +4366,12 @@ const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const { // Calling Convention Implementation //===----------------------------------------------------------------------===// +#ifdef ARK_GC_SUPPORT +// TableGen provides definitions of the calling convention analysis entry +// points. +#include "LoongArchGenCallingConv.inc" +#endif + // Eight general-purpose registers a0-a7 used for passing integer arguments, // with a0-a1 reused to return values. Generally, the GPRs are used to pass // fixed-point arguments, and floating-point arguments when no FPR is available @@ -1797,6 +4389,14 @@ const MCPhysReg ArgFPR64s[] = { LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64, LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64}; +const MCPhysReg ArgVRs[] = {LoongArch::VR0, LoongArch::VR1, LoongArch::VR2, + LoongArch::VR3, LoongArch::VR4, LoongArch::VR5, + LoongArch::VR6, LoongArch::VR7}; + +const MCPhysReg ArgXRs[] = {LoongArch::XR0, LoongArch::XR1, LoongArch::XR2, + LoongArch::XR3, LoongArch::XR4, LoongArch::XR5, + LoongArch::XR6, LoongArch::XR7}; + // Pass a 2*GRLen argument that has been split into two GRLen values through // registers or the stack as necessary. static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State, @@ -1943,6 +4543,10 @@ static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI, Reg = State.AllocateReg(ArgFPR32s); else if (ValVT == MVT::f64 && !UseGPRForFloat) Reg = State.AllocateReg(ArgFPR64s); + else if (ValVT.is128BitVector()) + Reg = State.AllocateReg(ArgVRs); + else if (ValVT.is256BitVector()) + Reg = State.AllocateReg(ArgXRs); else Reg = State.AllocateReg(ArgGPRs); @@ -2109,11 +4713,20 @@ static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State) { if (LocVT == MVT::i32 || LocVT == MVT::i64) { +#ifdef ARK_GC_SUPPORT + // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, SpLim + // s0 fp s1 s2 s3 s4 s5 s6 s7 s8 + static const MCPhysReg GPRList[] = { + LoongArch::R23, LoongArch::R22, LoongArch::R24, LoongArch::R25, + LoongArch::R26, LoongArch::R27, LoongArch::R28, LoongArch::R29, + LoongArch::R30, LoongArch::R31}; +#else // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, SpLim // s0 s1 s2 s3 s4 s5 s6 s7 s8 static const MCPhysReg GPRList[] = { LoongArch::R23, LoongArch::R24, LoongArch::R25, LoongArch::R26, LoongArch::R27, LoongArch::R28, LoongArch::R29, LoongArch::R30, LoongArch::R31}; +#endif if (unsigned Reg = State.AllocateReg(GPRList)) { State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); return false; @@ -2165,6 +4778,15 @@ SDValue LoongArchTargetLowering::LowerFormalArguments( !MF.getSubtarget().getFeatureBits()[LoongArch::FeatureBasicD]) report_fatal_error( "GHC calling convention requires the F and D extensions"); + break; +#ifdef ARK_GC_SUPPORT + case CallingConv::WebKit_JS: + if (!MF.getSubtarget().getFeatureBits()[LoongArch::FeatureBasicF] || + !MF.getSubtarget().getFeatureBits()[LoongArch::FeatureBasicD]) + report_fatal_error( + "WebKit_JS calling convention requires the F and D extensions"); + break; +#endif } EVT PtrVT = getPointerTy(DAG.getDataLayout()); @@ -2179,6 +4801,10 @@ SDValue LoongArchTargetLowering::LowerFormalArguments( if (CallConv == CallingConv::GHC) CCInfo.AnalyzeFormalArguments(Ins, CC_LoongArch_GHC); +#ifdef ARK_GC_SUPPORT + else if (CallConv == CallingConv::WebKit_JS) + CCInfo.AnalyzeFormalArguments(Ins, CC_LoongArch_WebKit_JS); +#endif else analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, CC_LoongArch); @@ -2354,6 +4980,10 @@ LoongArchTargetLowering::LowerCall(CallLoweringInfo &CLI, if (CallConv == CallingConv::GHC) ArgCCInfo.AnalyzeCallOperands(Outs, CC_LoongArch_GHC); +#ifdef ARK_GC_SUPPORT + else if (CallConv == CallingConv::WebKit_JS) + ArgCCInfo.AnalyzeCallOperands(Outs, CC_LoongArch_WebKit_JS); +#endif else analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI, CC_LoongArch); @@ -2531,13 +5161,31 @@ LoongArchTargetLowering::LowerCall(CallLoweringInfo &CLI, // Emit the call. SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + unsigned Op; + switch (DAG.getTarget().getCodeModel()) { + default: + report_fatal_error("Unsupported code model"); + case CodeModel::Small: + Op = IsTailCall ? LoongArchISD::TAIL : LoongArchISD::CALL; + break; + case CodeModel::Medium: + assert(Subtarget.is64Bit() && "Medium code model requires LA64"); + Op = IsTailCall ? LoongArchISD::TAIL_MEDIUM : LoongArchISD::CALL_MEDIUM; + break; + case CodeModel::Large: + assert(Subtarget.is64Bit() && "Large code model requires LA64"); + Op = IsTailCall ? LoongArchISD::TAIL_LARGE : LoongArchISD::CALL_LARGE; + break; + } if (IsTailCall) { MF.getFrameInfo().setHasTailCall(); - return DAG.getNode(LoongArchISD::TAIL, DL, NodeTys, Ops); + SDValue Ret = DAG.getNode(Op, DL, NodeTys, Ops); + DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge); + return Ret; } - Chain = DAG.getNode(LoongArchISD::CALL, DL, NodeTys, Ops); + Chain = DAG.getNode(Op, DL, NodeTys, Ops); DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge); Glue = Chain.getValue(1); @@ -2549,7 +5197,12 @@ LoongArchTargetLowering::LowerCall(CallLoweringInfo &CLI, // Assign locations to each value returned by this call. SmallVector RVLocs; CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext()); - analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_LoongArch); +#ifdef ARK_GC_SUPPORT + if (CallConv == CallingConv::WebKit_JS) + RetCCInfo.AnalyzeCallResult(Ins, RetCC_LoongArch_WebKit_JS); + else +#endif + analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_LoongArch); // Copy all of the result registers out of their specified physreg. for (auto &VA : RVLocs) { @@ -2574,6 +5227,11 @@ bool LoongArchTargetLowering::CanLowerReturn( SmallVector RVLocs; CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context); +#ifdef ARK_GC_SUPPORT + if (CallConv == CallingConv::WebKit_JS) + return CCInfo.CheckReturn(Outs, RetCC_LoongArch_WebKit_JS); +#endif + for (unsigned i = 0, e = Outs.size(); i != e; ++i) { LoongArchABI::ABI ABI = MF.getSubtarget().getTargetABI(); @@ -2597,10 +5255,15 @@ SDValue LoongArchTargetLowering::LowerReturn( CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs, *DAG.getContext()); - analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true, - nullptr, CC_LoongArch); if (CallConv == CallingConv::GHC && !RVLocs.empty()) report_fatal_error("GHC functions return void only"); +#ifdef ARK_GC_SUPPORT + else if (CallConv == CallingConv::WebKit_JS) + CCInfo.AnalyzeReturn(Outs, RetCC_LoongArch_WebKit_JS); +#endif + else + analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true, + nullptr, CC_LoongArch); SDValue Glue; SmallVector RetOps(1, Chain); @@ -2655,8 +5318,9 @@ bool LoongArchTargetLowering::shouldInsertFencesForAtomic( // On LA64, atomic store operations with IntegerBitWidth of 32 and 64 do not // require fences beacuse we can use amswap_db.[w/d]. - if (isa(I)) { - unsigned Size = I->getOperand(0)->getType()->getIntegerBitWidth(); + Type *Ty = I->getOperand(0)->getType(); + if (isa(I) && Ty->isIntegerTy()) { + unsigned Size = Ty->getIntegerBitWidth(); return (Size == 8 || Size == 16); } @@ -2932,6 +5596,12 @@ LoongArchTargetLowering::getRegForInlineAsmConstraint( return std::make_pair(0U, &LoongArch::FPR32RegClass); if (Subtarget.hasBasicD() && VT == MVT::f64) return std::make_pair(0U, &LoongArch::FPR64RegClass); + if (Subtarget.hasExtLSX() && + TRI->isTypeLegalForClass(LoongArch::LSX128RegClass, VT)) + return std::make_pair(0U, &LoongArch::LSX128RegClass); + if (Subtarget.hasExtLASX() && + TRI->isTypeLegalForClass(LoongArch::LASX256RegClass, VT)) + return std::make_pair(0U, &LoongArch::LASX256RegClass); break; default: break; @@ -2949,7 +5619,8 @@ LoongArchTargetLowering::getRegForInlineAsmConstraint( // decode the usage of register name aliases into their official names. And // AFAIK, the not yet upstreamed `rustc` for LoongArch will always use // official register names. - if (Constraint.startswith("{$r") || Constraint.startswith("{$f")) { + if (Constraint.startswith("{$r") || Constraint.startswith("{$f") || + Constraint.startswith("{$vr") || Constraint.startswith("{$xr")) { bool IsFP = Constraint[2] == 'f'; std::pair Temp = Constraint.split('$'); std::pair R; @@ -3059,3 +5730,8 @@ bool LoongArchTargetLowering::decomposeMulByConstant(LLVMContext &Context, return false; } + +ISD::NodeType LoongArchTargetLowering::getExtendForAtomicCmpSwapArg() const { + // TODO: LAMCAS will use amcas{_DB,}.[bhwd] which does not require extension. + return ISD::SIGN_EXTEND; +} diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h index f6a2b2dfce2220462ae967d66fef98e3ea2a54f3..2a2e40daa7c0f9699cae7a51265853f29198eb10 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h @@ -28,8 +28,12 @@ enum NodeType : unsigned { // TODO: add more LoongArchISDs CALL, + CALL_MEDIUM, + CALL_LARGE, RET, TAIL, + TAIL_MEDIUM, + TAIL_LARGE, // 32-bit shifts, directly matching the semantics of the named LoongArch // instructions. @@ -110,6 +114,30 @@ enum NodeType : unsigned { // Read CPU configuration information operation CPUCFG, + + // Vector Shuffle + VREPLVE, + VSHUF, + VPICKEV, + VPICKOD, + VPACKEV, + VPACKOD, + VILVL, + VILVH, + VSHUF4I, + VREPLVEI, + XVPERMI, + + // Extended vector element extraction + VPICK_SEXT_ELT, + VPICK_ZEXT_ELT, + + // Vector comparisons + VALL_ZERO, + VANY_ZERO, + VALL_NONZERO, + VANY_NONZERO, + // Intrinsic operations end ============================================= }; } // end namespace LoongArchISD @@ -189,6 +217,8 @@ public: return ISD::SIGN_EXTEND; } + ISD::NodeType getExtendForAtomicCmpSwapArg() const override; + Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override; bool mayBeEmittedAsTailCall(const CallInst *CI) const override; @@ -201,6 +231,10 @@ public: MachineMemOperand::Flags Flags = MachineMemOperand::MONone, bool *Fast = nullptr) const override; + bool isShuffleMaskLegal(ArrayRef Mask, EVT VT) const override { + return false; + } + private: /// Target-specific function used to lower LoongArch calling conventions. typedef bool LoongArchCCAssignFn(const DataLayout &DL, LoongArchABI::ABI ABI, @@ -218,11 +252,12 @@ private: LoongArchCCAssignFn Fn) const; template - SDValue getAddr(NodeTy *N, SelectionDAG &DAG, bool IsLocal = true) const; + SDValue getAddr(NodeTy *N, SelectionDAG &DAG, CodeModel::Model M, + bool IsLocal = true) const; SDValue getStaticTLSAddr(GlobalAddressSDNode *N, SelectionDAG &DAG, - unsigned Opc) const; + unsigned Opc, bool Large = false) const; SDValue getDynamicTLSAddr(GlobalAddressSDNode *N, SelectionDAG &DAG, - unsigned Opc) const; + unsigned Opc, bool Large = false) const; SDValue lowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; SDValue lowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; SDValue lowerJumpTable(SDValue Op, SelectionDAG &DAG) const; @@ -246,6 +281,10 @@ private: SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; SDValue lowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override; diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrFormats.td b/llvm/lib/Target/LoongArch/LoongArchInstrFormats.td index bebc83a861ae591b7e61c19b011fa9248f66ec49..9288cabd17ec41c752082a478eb2046a0120cf8c 100644 --- a/llvm/lib/Target/LoongArch/LoongArchInstrFormats.td +++ b/llvm/lib/Target/LoongArch/LoongArchInstrFormats.td @@ -42,11 +42,15 @@ class Pseudo pattern = [], string opcstr = "", let isCodeGenOnly = 1; } +class deriveInsnMnemonic { + string ret = !tolower(!subst("@", "_", !subst("_", ".", !subst("__", "@", name)))); +} + // 2R-type // -class Fmt2R op, dag outs, dag ins, string opcstr, string opnstr, +class Fmt2R op, dag outs, dag ins, string opnstr, list pattern = []> - : LAInst { + : LAInst.ret, opnstr, pattern> { bits<5> rj; bits<5> rd; @@ -57,9 +61,9 @@ class Fmt2R op, dag outs, dag ins, string opcstr, string opnstr, // 3R-type // -class Fmt3R op, dag outs, dag ins, string opcstr, string opnstr, +class Fmt3R op, dag outs, dag ins, string opnstr, list pattern = []> - : LAInst { + : LAInst.ret, opnstr, pattern> { bits<5> rk; bits<5> rj; bits<5> rd; @@ -72,9 +76,9 @@ class Fmt3R op, dag outs, dag ins, string opcstr, string opnstr, // 3RI2-type // -class Fmt3RI2 op, dag outs, dag ins, string opcstr, string opnstr, +class Fmt3RI2 op, dag outs, dag ins, string opnstr, list pattern = []> - : LAInst { + : LAInst.ret, opnstr, pattern> { bits<2> imm2; bits<5> rk; bits<5> rj; @@ -89,9 +93,9 @@ class Fmt3RI2 op, dag outs, dag ins, string opcstr, string opnstr, // 3RI3-type // -class Fmt3RI3 op, dag outs, dag ins, string opcstr, string opnstr, +class Fmt3RI3 op, dag outs, dag ins, string opnstr, list pattern = []> - : LAInst { + : LAInst.ret, opnstr, pattern> { bits<3> imm3; bits<5> rk; bits<5> rj; @@ -106,9 +110,9 @@ class Fmt3RI3 op, dag outs, dag ins, string opcstr, string opnstr, // 2RI5-type // -class Fmt2RI5 op, dag outs, dag ins, string opcstr, string opnstr, +class Fmt2RI5 op, dag outs, dag ins, string opnstr, list pattern = []> - : LAInst { + : LAInst.ret, opnstr, pattern> { bits<5> imm5; bits<5> rj; bits<5> rd; @@ -121,9 +125,9 @@ class Fmt2RI5 op, dag outs, dag ins, string opcstr, string opnstr, // 2RI6-type // -class Fmt2RI6 op, dag outs, dag ins, string opcstr, string opnstr, +class Fmt2RI6 op, dag outs, dag ins, string opnstr, list pattern = []> - : LAInst { + : LAInst.ret, opnstr, pattern> { bits<6> imm6; bits<5> rj; bits<5> rd; @@ -136,9 +140,9 @@ class Fmt2RI6 op, dag outs, dag ins, string opcstr, string opnstr, // 2RI8-type // -class Fmt2RI8 op, dag outs, dag ins, string opcstr, string opnstr, +class Fmt2RI8 op, dag outs, dag ins, string opnstr, list pattern = []> - : LAInst { + : LAInst.ret, opnstr, pattern> { bits<8> imm8; bits<5> rj; bits<5> rd; @@ -151,9 +155,9 @@ class Fmt2RI8 op, dag outs, dag ins, string opcstr, string opnstr, // 2RI12-type // -class Fmt2RI12 op, dag outs, dag ins, string opcstr, string opnstr, +class Fmt2RI12 op, dag outs, dag ins, string opnstr, list pattern = []> - : LAInst { + : LAInst.ret, opnstr, pattern> { bits<12> imm12; bits<5> rj; bits<5> rd; @@ -166,9 +170,9 @@ class Fmt2RI12 op, dag outs, dag ins, string opcstr, string opnstr, // 2RI14-type // -class Fmt2RI14 op, dag outs, dag ins, string opcstr, string opnstr, +class Fmt2RI14 op, dag outs, dag ins, string opnstr, list pattern = []> - : LAInst { + : LAInst.ret, opnstr, pattern> { bits<14> imm14; bits<5> rj; bits<5> rd; @@ -181,9 +185,9 @@ class Fmt2RI14 op, dag outs, dag ins, string opcstr, string opnstr, // 2RI16-type // -class Fmt2RI16 op, dag outs, dag ins, string opcstr, string opnstr, +class Fmt2RI16 op, dag outs, dag ins, string opnstr, list pattern = []> - : LAInst { + : LAInst.ret, opnstr, pattern> { bits<16> imm16; bits<5> rj; bits<5> rd; @@ -196,9 +200,9 @@ class Fmt2RI16 op, dag outs, dag ins, string opcstr, string opnstr, // 1RI20-type // -class Fmt1RI20 op, dag outs, dag ins, string opcstr, string opnstr, +class Fmt1RI20 op, dag outs, dag ins, string opnstr, list pattern = []> - : LAInst { + : LAInst.ret, opnstr, pattern> { bits<20> imm20; bits<5> rd; @@ -209,9 +213,9 @@ class Fmt1RI20 op, dag outs, dag ins, string opcstr, string opnstr, // 1RI21-type // -class Fmt1RI21 op, dag outs, dag ins, string opcstr, string opnstr, +class Fmt1RI21 op, dag outs, dag ins, string opnstr, list pattern = []> - : LAInst { + : LAInst.ret, opnstr, pattern> { bits<21> imm21; bits<5> rj; @@ -223,9 +227,9 @@ class Fmt1RI21 op, dag outs, dag ins, string opcstr, string opnstr, // I15-type // -class FmtI15 op, dag outs, dag ins, string opcstr, string opnstr, +class FmtI15 op, dag outs, dag ins, string opnstr, list pattern = []> - : LAInst { + : LAInst.ret, opnstr, pattern> { bits<15> imm15; let Inst{31-15} = op; @@ -234,9 +238,9 @@ class FmtI15 op, dag outs, dag ins, string opcstr, string opnstr, // I26-type // -class FmtI26 op, dag outs, dag ins, string opcstr, string opnstr, +class FmtI26 op, dag outs, dag ins, string opnstr, list pattern = []> - : LAInst { + : LAInst.ret, opnstr, pattern> { bits<26> imm26; let Inst{31-26} = op; @@ -246,9 +250,9 @@ class FmtI26 op, dag outs, dag ins, string opcstr, string opnstr, // FmtBSTR_W // -class FmtBSTR_W op, dag outs, dag ins, string opcstr, string opnstr, +class FmtBSTR_W op, dag outs, dag ins, string opnstr, list pattern = []> - : LAInst { + : LAInst.ret, opnstr, pattern> { bits<5> msbw; bits<5> lsbw; bits<5> rj; @@ -264,9 +268,9 @@ class FmtBSTR_W op, dag outs, dag ins, string opcstr, string opnstr, // FmtBSTR_D // -class FmtBSTR_D op, dag outs, dag ins, string opcstr, string opnstr, +class FmtBSTR_D op, dag outs, dag ins, string opnstr, list pattern = []> - : LAInst { + : LAInst.ret, opnstr, pattern> { bits<6> msbd; bits<6> lsbd; bits<5> rj; @@ -281,9 +285,9 @@ class FmtBSTR_D op, dag outs, dag ins, string opcstr, string opnstr, // FmtASRT // -class FmtASRT op, dag outs, dag ins, string opcstr, string opnstr, +class FmtASRT op, dag outs, dag ins, string opnstr, list pattern = []> - : LAInst { + : LAInst.ret, opnstr, pattern> { bits<5> rk; bits<5> rj; @@ -295,9 +299,8 @@ class FmtASRT op, dag outs, dag ins, string opcstr, string opnstr, // FmtPRELD // < 0b0010101011 | I12 | rj | I5> -class FmtPRELD pattern = []> - : LAInst { +class FmtPRELD pattern = []> + : LAInst.ret, opnstr, pattern> { bits<12> imm12; bits<5> rj; bits<5> imm5; @@ -310,9 +313,8 @@ class FmtPRELD -class FmtPRELDX pattern = []> - : LAInst { +class FmtPRELDX pattern = []> + : LAInst.ret, opnstr, pattern> { bits<5> rk; bits<5> rj; bits<5> imm5; @@ -325,9 +327,9 @@ class FmtPRELDX -class FmtCSR op, dag outs, dag ins, string opcstr, string opnstr, +class FmtCSR op, dag outs, dag ins, string opnstr, list pattern = []> - : LAInst { + : LAInst.ret, opnstr, pattern> { bits<14> csr_num; bits<5> rd; @@ -339,9 +341,9 @@ class FmtCSR op, dag outs, dag ins, string opcstr, string opnstr, // FmtCSRXCHG // -class FmtCSRXCHG op, dag outs, dag ins, string opcstr, string opnstr, +class FmtCSRXCHG op, dag outs, dag ins, string opnstr, list pattern = []> - : LAInst { + : LAInst.ret, opnstr, pattern> { bits<14> csr_num; bits<5> rj; bits<5> rd; @@ -354,9 +356,8 @@ class FmtCSRXCHG op, dag outs, dag ins, string opcstr, string opnstr, // FmtCACOP // <0b0000011000 | I12 | rj | I5> -class FmtCACOP pattern = []> - : LAInst { +class FmtCACOP pattern = []> + : LAInst.ret, opnstr, pattern> { bits<12> imm12; bits<5> rj; bits<5> op; @@ -369,16 +370,15 @@ class FmtCACOP -class FmtI32 op, string opstr, list pattern = []> - : LAInst<(outs), (ins), opstr, "", pattern> { +class FmtI32 op, list pattern = []> + : LAInst<(outs), (ins), deriveInsnMnemonic.ret, "", pattern> { let Inst{31-0} = op; } // FmtINVTLB // <0b00000110010010011 | rk | rj | I5> -class FmtINVTLB pattern = []> - : LAInst { +class FmtINVTLB pattern = []> + : LAInst.ret, opnstr, pattern> { bits<5> rk; bits<5> rj; bits<5> op; @@ -391,9 +391,8 @@ class FmtINVTLB -class FmtLDPTE pattern = []> - : LAInst { +class FmtLDPTE pattern = []> + : LAInst.ret, opnstr, pattern> { bits<8> seq; bits<5> rj; diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp index 0145e3c765875bdf8e34a55709c548c71b0de9f3..ffd8824bccba47b526363ab8a251df6beceda8a2 100644 --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp @@ -17,6 +17,7 @@ #include "MCTargetDesc/LoongArchMCTargetDesc.h" #include "MCTargetDesc/LoongArchMatInt.h" #include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/CodeGen/StackMaps.h" #include "llvm/MC/MCInstBuilder.h" using namespace llvm; @@ -47,6 +48,22 @@ void LoongArchInstrInfo::copyPhysReg(MachineBasicBlock &MBB, return; } + // VR->VR copies. + if (LoongArch::LSX128RegClass.contains(DstReg, SrcReg)) { + BuildMI(MBB, MBBI, DL, get(LoongArch::VORI_B), DstReg) + .addReg(SrcReg, getKillRegState(KillSrc)) + .addImm(0); + return; + } + + // XR->XR copies. + if (LoongArch::LASX256RegClass.contains(DstReg, SrcReg)) { + BuildMI(MBB, MBBI, DL, get(LoongArch::XVORI_B), DstReg) + .addReg(SrcReg, getKillRegState(KillSrc)) + .addImm(0); + return; + } + // GPR->CFR copy. if (LoongArch::CFRRegClass.contains(DstReg) && LoongArch::GPRRegClass.contains(SrcReg)) { @@ -68,6 +85,14 @@ void LoongArchInstrInfo::copyPhysReg(MachineBasicBlock &MBB, Opc = LoongArch::FMOV_S; } else if (LoongArch::FPR64RegClass.contains(DstReg, SrcReg)) { Opc = LoongArch::FMOV_D; + } else if (LoongArch::GPRRegClass.contains(DstReg) && + LoongArch::FPR32RegClass.contains(SrcReg)) { + // FPR32 -> GPR copies + Opc = LoongArch::MOVFR2GR_S; + } else if (LoongArch::GPRRegClass.contains(DstReg) && + LoongArch::FPR64RegClass.contains(SrcReg)) { + // FPR64 -> GPR copies + Opc = LoongArch::MOVFR2GR_D; } else { // TODO: support other copies. llvm_unreachable("Impossible reg-to-reg copy"); @@ -96,6 +121,10 @@ void LoongArchInstrInfo::storeRegToStackSlot( Opcode = LoongArch::FST_S; else if (LoongArch::FPR64RegClass.hasSubClassEq(RC)) Opcode = LoongArch::FST_D; + else if (LoongArch::LSX128RegClass.hasSubClassEq(RC)) + Opcode = LoongArch::VST; + else if (LoongArch::LASX256RegClass.hasSubClassEq(RC)) + Opcode = LoongArch::XVST; else if (LoongArch::CFRRegClass.hasSubClassEq(RC)) Opcode = LoongArch::PseudoST_CFR; else @@ -131,6 +160,10 @@ void LoongArchInstrInfo::loadRegFromStackSlot( Opcode = LoongArch::FLD_S; else if (LoongArch::FPR64RegClass.hasSubClassEq(RC)) Opcode = LoongArch::FLD_D; + else if (LoongArch::LSX128RegClass.hasSubClassEq(RC)) + Opcode = LoongArch::VLD; + else if (LoongArch::LASX256RegClass.hasSubClassEq(RC)) + Opcode = LoongArch::XVLD; else if (LoongArch::CFRRegClass.hasSubClassEq(RC)) Opcode = LoongArch::PseudoLD_CFR; else @@ -192,7 +225,25 @@ unsigned LoongArchInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo(); return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI); } - return MI.getDesc().getSize(); + + unsigned NumBytes = 0; + const MCInstrDesc &Desc = MI.getDesc(); + + // Size should be preferably set in + // llvm/lib/Target/LoongArch/LoongArch*InstrInfo.td (default case). + // Specific cases handle instructions of variable sizes. + switch (Desc.getOpcode()) { + default: + return Desc.getSize(); + case TargetOpcode::STATEPOINT: + NumBytes = StatepointOpers(&MI).getNumPatchBytes(); + assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!"); + // No patch bytes means a normal call inst (i.e. `bl`) is emitted. + if (NumBytes == 0) + NumBytes = 4; + break; + } + return NumBytes; } MachineBasicBlock * @@ -481,12 +532,20 @@ LoongArchInstrInfo::getSerializableDirectMachineOperandTargetFlags() const { {MO_CALL_PLT, "loongarch-call-plt"}, {MO_PCREL_HI, "loongarch-pcrel-hi"}, {MO_PCREL_LO, "loongarch-pcrel-lo"}, + {MO_PCREL64_LO, "loongarch-pcrel64-lo"}, + {MO_PCREL64_HI, "loongarch-pcrel64-hi"}, {MO_GOT_PC_HI, "loongarch-got-pc-hi"}, {MO_GOT_PC_LO, "loongarch-got-pc-lo"}, + {MO_GOT_PC64_LO, "loongarch-got-pc64-lo"}, + {MO_GOT_PC64_HI, "loongarch-got-pc64-hi"}, {MO_LE_HI, "loongarch-le-hi"}, {MO_LE_LO, "loongarch-le-lo"}, + {MO_LE64_LO, "loongarch-le64-lo"}, + {MO_LE64_HI, "loongarch-le64-hi"}, {MO_IE_PC_HI, "loongarch-ie-pc-hi"}, {MO_IE_PC_LO, "loongarch-ie-pc-lo"}, + {MO_IE_PC64_LO, "loongarch-ie-pc64-lo"}, + {MO_IE_PC64_HI, "loongarch-ie-pc64-hi"}, {MO_LD_PC_HI, "loongarch-ld-pc-hi"}, {MO_GD_PC_HI, "loongarch-gd-pc-hi"}}; return makeArrayRef(TargetFlags); diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td index f20beee9224b104181eff14c4a223101df0f1191..d4c2e8694d4a2de707934a4eb5c0871c224fbbe0 100644 --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td @@ -69,6 +69,18 @@ def loongarch_ret : SDNode<"LoongArchISD::RET", SDTNone, def loongarch_tail : SDNode<"LoongArchISD::TAIL", SDT_LoongArchCall, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; +def loongarch_call_medium : SDNode<"LoongArchISD::CALL_MEDIUM", SDT_LoongArchCall, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, + SDNPVariadic]>; +def loongarch_tail_medium : SDNode<"LoongArchISD::TAIL_MEDIUM", SDT_LoongArchCall, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, + SDNPVariadic]>; +def loongarch_call_large : SDNode<"LoongArchISD::CALL_LARGE", SDT_LoongArchCall, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, + SDNPVariadic]>; +def loongarch_tail_large : SDNode<"LoongArchISD::TAIL_LARGE", SDT_LoongArchCall, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, + SDNPVariadic]>; def loongarch_sll_w : SDNode<"LoongArchISD::SLL_W", SDT_LoongArchIntBinOpW>; def loongarch_sra_w : SDNode<"LoongArchISD::SRA_W", SDT_LoongArchIntBinOpW>; def loongarch_srl_w : SDNode<"LoongArchISD::SRL_W", SDT_LoongArchIntBinOpW>; @@ -181,6 +193,10 @@ def imm32 : Operand { let ParserMatchClass = ImmAsmOperand<"", 32, "">; } +def uimm1 : Operand, ImmLeaf(Imm);}]>{ + let ParserMatchClass = UImmAsmOperand<1>; +} + def uimm2 : Operand, ImmLeaf(Imm);}]> { let ParserMatchClass = UImmAsmOperand<2>; } @@ -192,10 +208,14 @@ def uimm2_plus1 : Operand, let DecoderMethod = "decodeUImmOperand<2, 1>"; } -def uimm3 : Operand { +def uimm3 : Operand, ImmLeaf(Imm);}]> { let ParserMatchClass = UImmAsmOperand<3>; } +def uimm4 : Operand, ImmLeaf(Imm);}]> { + let ParserMatchClass = UImmAsmOperand<4>; +} + def uimm5 : Operand, ImmLeaf(Imm);}]> { let ParserMatchClass = UImmAsmOperand<5>; } @@ -204,6 +224,10 @@ def uimm6 : Operand, ImmLeaf(Imm);}]> { let ParserMatchClass = UImmAsmOperand<6>; } +def uimm7 : Operand { + let ParserMatchClass = UImmAsmOperand<7>; +} + def uimm8 : Operand, ImmLeaf(Imm);}]> { let ParserMatchClass = UImmAsmOperand<8>; } @@ -231,6 +255,46 @@ def uimm15 : Operand, let ParserMatchClass = UImmAsmOperand<15>; } +def simm5 : Operand { + let ParserMatchClass = SImmAsmOperand<5>; + let DecoderMethod = "decodeSImmOperand<5>"; +} + +def simm8 : Operand { + let ParserMatchClass = SImmAsmOperand<8>; + let DecoderMethod = "decodeSImmOperand<8>"; +} + +foreach I = [1, 2, 3] in { +def simm8_lsl # I : Operand { + let ParserMatchClass = SImmAsmOperand<8, "lsl" # I>; + let EncoderMethod = "getImmOpValueAsr<" # I # ">"; + let DecoderMethod = "decodeSImmOperand<8," # I # ">"; +} +} + +def simm9_lsl3 : Operand { + let ParserMatchClass = SImmAsmOperand<9, "lsl3">; + let EncoderMethod = "getImmOpValueAsr<3>"; + let DecoderMethod = "decodeSImmOperand<9, 3>"; +} + +def simm10 : Operand { + let ParserMatchClass = SImmAsmOperand<10>; +} + +def simm10_lsl2 : Operand { + let ParserMatchClass = SImmAsmOperand<10, "lsl2">; + let EncoderMethod = "getImmOpValueAsr<2>"; + let DecoderMethod = "decodeSImmOperand<10, 2>"; +} + +def simm11_lsl1 : Operand { + let ParserMatchClass = SImmAsmOperand<11, "lsl1">; + let EncoderMethod = "getImmOpValueAsr<1>"; + let DecoderMethod = "decodeSImmOperand<11, 1>"; +} + class SImm12Operand : Operand, ImmLeaf (Imm);}]> { let DecoderMethod = "decodeSImmOperand<12>"; @@ -248,10 +312,15 @@ def simm12_lu52id : SImm12Operand { let ParserMatchClass = SImmAsmOperand<12, "lu52id">; } +def simm13 : Operand { + let ParserMatchClass = SImmAsmOperand<13>; + let DecoderMethod = "decodeSImmOperand<13>"; +} + def simm14_lsl2 : Operand, ImmLeaf(Imm);}]> { let ParserMatchClass = SImmAsmOperand<14, "lsl2">; - let EncoderMethod = "getImmOpValueAsr2"; + let EncoderMethod = "getImmOpValueAsr<2>"; let DecoderMethod = "decodeSImmOperand<14, 2>"; } @@ -263,13 +332,13 @@ def simm16 : Operand { def simm16_lsl2 : Operand, ImmLeaf(Imm>>2);}]> { let ParserMatchClass = SImmAsmOperand<16, "lsl2">; - let EncoderMethod = "getImmOpValueAsr2"; + let EncoderMethod = "getImmOpValueAsr<2>"; let DecoderMethod = "decodeSImmOperand<16, 2>"; } def simm16_lsl2_br : Operand { let ParserMatchClass = SImmAsmOperand<16, "lsl2">; - let EncoderMethod = "getImmOpValueAsr2"; + let EncoderMethod = "getImmOpValueAsr<2>"; let DecoderMethod = "decodeSImmOperand<16, 2>"; } @@ -293,9 +362,13 @@ def simm20_lu32id : SImm20Operand { let ParserMatchClass = SImmAsmOperand<20, "lu32id">; } +def simm20_pcaddu18i : SImm20Operand { + let ParserMatchClass = SImmAsmOperand<20, "pcaddu18i">; +} + def simm21_lsl2 : Operand { let ParserMatchClass = SImmAsmOperand<21, "lsl2">; - let EncoderMethod = "getImmOpValueAsr2"; + let EncoderMethod = "getImmOpValueAsr<2>"; let DecoderMethod = "decodeSImmOperand<21, 2>"; } @@ -310,7 +383,7 @@ def SImm26OperandB: AsmOperandClass { // A symbol or an imm used in B/PseudoBR. def simm26_b : Operand { let ParserMatchClass = SImm26OperandB; - let EncoderMethod = "getImmOpValueAsr2"; + let EncoderMethod = "getImmOpValueAsr<2>"; let DecoderMethod = "decodeSImmOperand<26, 2>"; } @@ -325,7 +398,7 @@ def SImm26OperandBL: AsmOperandClass { // A symbol or an imm used in BL/PseudoCALL/PseudoTAIL. def simm26_symbol : Operand { let ParserMatchClass = SImm26OperandBL; - let EncoderMethod = "getImmOpValueAsr2"; + let EncoderMethod = "getImmOpValueAsr<2>"; let DecoderMethod = "decodeSImmOperand<26, 2>"; } @@ -378,220 +451,232 @@ def fma_nsz : PatFrag<(ops node:$fj, node:$fk, node:$fa), include "LoongArchInstrFormats.td" include "LoongArchFloatInstrFormats.td" +include "LoongArchLSXInstrFormats.td" +include "LoongArchLASXInstrFormats.td" //===----------------------------------------------------------------------===// // Instruction Class Templates //===----------------------------------------------------------------------===// -class ALU_3R op, string opstr> - : Fmt3R; -class ALU_2R op, string opstr> - : Fmt2R; +let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in { +class ALU_3R op> + : Fmt3R; +class ALU_2R op> + : Fmt2R; -class ALU_3RI2 op, string opstr, Operand ImmOpnd> - : Fmt3RI2 op, Operand ImmOpnd> + : Fmt3RI2; -class ALU_3RI3 op, string opstr, Operand ImmOpnd> - : Fmt3RI3 op, Operand ImmOpnd> + : Fmt3RI3; -class ALU_2RI5 op, string opstr, Operand ImmOpnd> - : Fmt2RI5 op, Operand ImmOpnd> + : Fmt2RI5; -class ALU_2RI6 op, string opstr, Operand ImmOpnd> - : Fmt2RI6 op, Operand ImmOpnd> + : Fmt2RI6; -class ALU_2RI12 op, string opstr, Operand ImmOpnd> - : Fmt2RI12 op, Operand ImmOpnd> + : Fmt2RI12; -class ALU_2RI16 op, string opstr, Operand ImmOpnd> - : Fmt2RI16 op, Operand ImmOpnd> + : Fmt2RI16; -class ALU_1RI20 op, string opstr, Operand ImmOpnd> - : Fmt1RI20; +class ALU_1RI20 op, Operand ImmOpnd> + : Fmt1RI20; +} // hasSideEffects = 0, mayLoad = 0, mayStore = 0 -class MISC_I15 op, string opstr> - : FmtI15; +let hasSideEffects = 1, mayLoad = 0, mayStore = 0 in +class MISC_I15 op> + : FmtI15; -class RDTIME_2R op, string opstr> - : Fmt2R; +let hasSideEffects = 1, mayLoad = 0, mayStore = 0 in +class RDTIME_2R op> + : Fmt2R; -class BrCC_2RI16 op, string opstr> - : Fmt2RI16 op> + : Fmt2RI16 { let isBranch = 1; let isTerminator = 1; } -class BrCCZ_1RI21 op, string opstr> - : Fmt1RI21 op> + : Fmt1RI21 { let isBranch = 1; let isTerminator = 1; } -class Br_I26 op, string opstr> - : FmtI26 { +class Br_I26 op> + : FmtI26 { let isBranch = 1; let isTerminator = 1; + let isBarrier = 1; } +} // hasSideEffects = 0, mayLoad = 0, mayStore = 0 -let mayLoad = 1 in { -class LOAD_3R op, string opstr> - : Fmt3R; -class LOAD_2RI12 op, string opstr> - : Fmt2RI12 op> + : Fmt3R; +class LOAD_2RI12 op> + : Fmt2RI12; -class LOAD_2RI14 op, string opstr> - : Fmt2RI14 op> + : Fmt2RI14; -} // mayLoad = 1 +} // hasSideEffects = 0, mayLoad = 1, mayStore = 0 -let mayStore = 1 in { -class STORE_3R op, string opstr> - : Fmt3R op> + : Fmt3R; -class STORE_2RI12 op, string opstr> - : Fmt2RI12 op> + : Fmt2RI12; -class STORE_2RI14 op, string opstr> - : Fmt2RI14 op> + : Fmt2RI14; -} // mayStore = 1 +} // hasSideEffects = 0, mayLoad = 0, mayStore = 1 -let mayLoad = 1, mayStore = 1, Constraints = "@earlyclobber $rd" in -class AM_3R op, string opstr> - : Fmt3R op> + : Fmt3R; -let mayLoad = 1 in -class LLBase op, string opstr> - : Fmt2RI14 op> + : Fmt2RI14; -let mayStore = 1, Constraints = "$rd = $dst" in -class SCBase op, string opstr> +let hasSideEffects = 0, mayLoad = 0, mayStore = 1, Constraints = "$rd = $dst" in +class SCBase op> : Fmt2RI14; + "$rd, $rj, $imm14">; -class IOCSRRD op, string opstr> - : Fmt2R; +let hasSideEffects = 1 in +class IOCSRRD op> + : Fmt2R; -class IOCSRWR op, string opstr> - : Fmt2R; +let hasSideEffects = 1 in +class IOCSRWR op> + : Fmt2R; //===----------------------------------------------------------------------===// // Basic Integer Instructions //===----------------------------------------------------------------------===// // Arithmetic Operation Instructions -def ADD_W : ALU_3R<0b00000000000100000, "add.w">; -def SUB_W : ALU_3R<0b00000000000100010, "sub.w">; -def ADDI_W : ALU_2RI12<0b0000001010, "addi.w", simm12_addlike>; -def ALSL_W : ALU_3RI2<0b000000000000010, "alsl.w", uimm2_plus1>; -def LU12I_W : ALU_1RI20<0b0001010, "lu12i.w", simm20_lu12iw>; -def SLT : ALU_3R<0b00000000000100100, "slt">; -def SLTU : ALU_3R<0b00000000000100101, "sltu">; -def SLTI : ALU_2RI12<0b0000001000, "slti", simm12>; -def SLTUI : ALU_2RI12<0b0000001001, "sltui", simm12>; -def PCADDI : ALU_1RI20<0b0001100, "pcaddi", simm20>; -def PCADDU12I : ALU_1RI20<0b0001110, "pcaddu12i", simm20>; -def PCALAU12I : ALU_1RI20<0b0001101, "pcalau12i", simm20_pcalau12i>; -def AND : ALU_3R<0b00000000000101001, "and">; -def OR : ALU_3R<0b00000000000101010, "or">; -def NOR : ALU_3R<0b00000000000101000, "nor">; -def XOR : ALU_3R<0b00000000000101011, "xor">; -def ANDN : ALU_3R<0b00000000000101101, "andn">; -def ORN : ALU_3R<0b00000000000101100, "orn">; -def ANDI : ALU_2RI12<0b0000001101, "andi", uimm12>; -def ORI : ALU_2RI12<0b0000001110, "ori", uimm12_ori>; -def XORI : ALU_2RI12<0b0000001111, "xori", uimm12>; -def MUL_W : ALU_3R<0b00000000000111000, "mul.w">; -def MULH_W : ALU_3R<0b00000000000111001, "mulh.w">; -def MULH_WU : ALU_3R<0b00000000000111010, "mulh.wu">; +def ADD_W : ALU_3R<0b00000000000100000>; +def SUB_W : ALU_3R<0b00000000000100010>; +def ADDI_W : ALU_2RI12<0b0000001010, simm12_addlike>; +def ALSL_W : ALU_3RI2<0b000000000000010, uimm2_plus1>; +def LU12I_W : ALU_1RI20<0b0001010, simm20_lu12iw>; +def SLT : ALU_3R<0b00000000000100100>; +def SLTU : ALU_3R<0b00000000000100101>; +def SLTI : ALU_2RI12<0b0000001000, simm12>; +def SLTUI : ALU_2RI12<0b0000001001, simm12>; +def PCADDI : ALU_1RI20<0b0001100, simm20>; +def PCADDU12I : ALU_1RI20<0b0001110, simm20>; +def PCALAU12I : ALU_1RI20<0b0001101, simm20_pcalau12i>; +def AND : ALU_3R<0b00000000000101001>; +def OR : ALU_3R<0b00000000000101010>; +def NOR : ALU_3R<0b00000000000101000>; +def XOR : ALU_3R<0b00000000000101011>; +def ANDN : ALU_3R<0b00000000000101101>; +def ORN : ALU_3R<0b00000000000101100>; +def ANDI : ALU_2RI12<0b0000001101, uimm12>; +def ORI : ALU_2RI12<0b0000001110, uimm12_ori>; +def XORI : ALU_2RI12<0b0000001111, uimm12>; +def MUL_W : ALU_3R<0b00000000000111000>; +def MULH_W : ALU_3R<0b00000000000111001>; +def MULH_WU : ALU_3R<0b00000000000111010>; let usesCustomInserter = true in { -def DIV_W : ALU_3R<0b00000000001000000, "div.w">; -def MOD_W : ALU_3R<0b00000000001000001, "mod.w">; -def DIV_WU : ALU_3R<0b00000000001000010, "div.wu">; -def MOD_WU : ALU_3R<0b00000000001000011, "mod.wu">; +def DIV_W : ALU_3R<0b00000000001000000>; +def MOD_W : ALU_3R<0b00000000001000001>; +def DIV_WU : ALU_3R<0b00000000001000010>; +def MOD_WU : ALU_3R<0b00000000001000011>; } // usesCustomInserter = true // Bit-shift Instructions -def SLL_W : ALU_3R<0b00000000000101110, "sll.w">; -def SRL_W : ALU_3R<0b00000000000101111, "srl.w">; -def SRA_W : ALU_3R<0b00000000000110000, "sra.w">; -def ROTR_W : ALU_3R<0b00000000000110110, "rotr.w">; +def SLL_W : ALU_3R<0b00000000000101110>; +def SRL_W : ALU_3R<0b00000000000101111>; +def SRA_W : ALU_3R<0b00000000000110000>; +def ROTR_W : ALU_3R<0b00000000000110110>; -def SLLI_W : ALU_2RI5<0b00000000010000001, "slli.w", uimm5>; -def SRLI_W : ALU_2RI5<0b00000000010001001, "srli.w", uimm5>; -def SRAI_W : ALU_2RI5<0b00000000010010001, "srai.w", uimm5>; -def ROTRI_W : ALU_2RI5<0b00000000010011001, "rotri.w", uimm5>; +def SLLI_W : ALU_2RI5<0b00000000010000001, uimm5>; +def SRLI_W : ALU_2RI5<0b00000000010001001, uimm5>; +def SRAI_W : ALU_2RI5<0b00000000010010001, uimm5>; +def ROTRI_W : ALU_2RI5<0b00000000010011001, uimm5>; // Bit-manipulation Instructions -def EXT_W_B : ALU_2R<0b0000000000000000010111, "ext.w.b">; -def EXT_W_H : ALU_2R<0b0000000000000000010110, "ext.w.h">; -def CLO_W : ALU_2R<0b0000000000000000000100, "clo.w">; -def CLZ_W : ALU_2R<0b0000000000000000000101, "clz.w">; -def CTO_W : ALU_2R<0b0000000000000000000110, "cto.w">; -def CTZ_W : ALU_2R<0b0000000000000000000111, "ctz.w">; -def BYTEPICK_W : ALU_3RI2<0b000000000000100, "bytepick.w", uimm2>; -def REVB_2H : ALU_2R<0b0000000000000000001100, "revb.2h">; -def BITREV_4B : ALU_2R<0b0000000000000000010010, "bitrev.4b">; -def BITREV_W : ALU_2R<0b0000000000000000010100, "bitrev.w">; +def EXT_W_B : ALU_2R<0b0000000000000000010111>; +def EXT_W_H : ALU_2R<0b0000000000000000010110>; +def CLO_W : ALU_2R<0b0000000000000000000100>; +def CLZ_W : ALU_2R<0b0000000000000000000101>; +def CTO_W : ALU_2R<0b0000000000000000000110>; +def CTZ_W : ALU_2R<0b0000000000000000000111>; +def BYTEPICK_W : ALU_3RI2<0b000000000000100, uimm2>; +def REVB_2H : ALU_2R<0b0000000000000000001100>; +def BITREV_4B : ALU_2R<0b0000000000000000010010>; +def BITREV_W : ALU_2R<0b0000000000000000010100>; let Constraints = "$rd = $dst" in { def BSTRINS_W : FmtBSTR_W<0b000000000110, (outs GPR:$dst), (ins GPR:$rd, GPR:$rj, uimm5:$msbw, uimm5:$lsbw), - "bstrins.w", "$rd, $rj, $msbw, $lsbw">; + "$rd, $rj, $msbw, $lsbw">; } def BSTRPICK_W : FmtBSTR_W<0b000000000111, (outs GPR:$rd), (ins GPR:$rj, uimm5:$msbw, uimm5:$lsbw), - "bstrpick.w", "$rd, $rj, $msbw, $lsbw">; -def MASKEQZ : ALU_3R<0b00000000000100110, "maskeqz">; -def MASKNEZ : ALU_3R<0b00000000000100111, "masknez">; + "$rd, $rj, $msbw, $lsbw">; +def MASKEQZ : ALU_3R<0b00000000000100110>; +def MASKNEZ : ALU_3R<0b00000000000100111>; // Branch Instructions -def BEQ : BrCC_2RI16<0b010110, "beq">; -def BNE : BrCC_2RI16<0b010111, "bne">; -def BLT : BrCC_2RI16<0b011000, "blt">; -def BGE : BrCC_2RI16<0b011001, "bge">; -def BLTU : BrCC_2RI16<0b011010, "bltu">; -def BGEU : BrCC_2RI16<0b011011, "bgeu">; -def BEQZ : BrCCZ_1RI21<0b010000, "beqz">; -def BNEZ : BrCCZ_1RI21<0b010001, "bnez">; -def B : Br_I26<0b010100, "b">; - -let isCall = 1, Defs=[R1] in -def BL : FmtI26<0b010101, (outs), (ins simm26_symbol:$imm26), "bl", "$imm26">; +def BEQ : BrCC_2RI16<0b010110>; +def BNE : BrCC_2RI16<0b010111>; +def BLT : BrCC_2RI16<0b011000>; +def BGE : BrCC_2RI16<0b011001>; +def BLTU : BrCC_2RI16<0b011010>; +def BGEU : BrCC_2RI16<0b011011>; +def BEQZ : BrCCZ_1RI21<0b010000>; +def BNEZ : BrCCZ_1RI21<0b010001>; +def B : Br_I26<0b010100>; + +let hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCall = 1, Defs=[R1] in +def BL : FmtI26<0b010101, (outs), (ins simm26_symbol:$imm26), "$imm26">; +let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in def JIRL : Fmt2RI16<0b010011, (outs GPR:$rd), - (ins GPR:$rj, simm16_lsl2:$imm16), "jirl", - "$rd, $rj, $imm16">; + (ins GPR:$rj, simm16_lsl2:$imm16), "$rd, $rj, $imm16">; // Common Memory Access Instructions -def LD_B : LOAD_2RI12<0b0010100000, "ld.b">; -def LD_H : LOAD_2RI12<0b0010100001, "ld.h">; -def LD_W : LOAD_2RI12<0b0010100010, "ld.w">; -def LD_BU : LOAD_2RI12<0b0010101000, "ld.bu">; -def LD_HU : LOAD_2RI12<0b0010101001, "ld.hu">; -def ST_B : STORE_2RI12<0b0010100100, "st.b">; -def ST_H : STORE_2RI12<0b0010100101, "st.h">; -def ST_W : STORE_2RI12<0b0010100110, "st.w">; -def PRELD : FmtPRELD<(outs), (ins uimm5:$imm5, GPR:$rj, simm12:$imm12), "preld", +def LD_B : LOAD_2RI12<0b0010100000>; +def LD_H : LOAD_2RI12<0b0010100001>; +def LD_W : LOAD_2RI12<0b0010100010>; +def LD_BU : LOAD_2RI12<0b0010101000>; +def LD_HU : LOAD_2RI12<0b0010101001>; +def ST_B : STORE_2RI12<0b0010100100>; +def ST_H : STORE_2RI12<0b0010100101>; +def ST_W : STORE_2RI12<0b0010100110>; +let hasSideEffects = 0, mayLoad = 1, mayStore = 1 in +def PRELD : FmtPRELD<(outs), (ins uimm5:$imm5, GPR:$rj, simm12:$imm12), "$imm5, $rj, $imm12">; // Atomic Memory Access Instructions -def LL_W : LLBase<0b00100000, "ll.w">; -def SC_W : SCBase<0b00100001, "sc.w">; +def LL_W : LLBase<0b00100000>; +def SC_W : SCBase<0b00100001>; // Barrier Instructions -def DBAR : MISC_I15<0b00111000011100100, "dbar">; -def IBAR : MISC_I15<0b00111000011100101, "ibar">; +def DBAR : MISC_I15<0b00111000011100100>; +def IBAR : MISC_I15<0b00111000011100101>; // Other Miscellaneous Instructions -def SYSCALL : MISC_I15<0b00000000001010110, "syscall">; -def BREAK : MISC_I15<0b00000000001010100, "break">; -def RDTIMEL_W : RDTIME_2R<0b0000000000000000011000, "rdtimel.w">; -def RDTIMEH_W : RDTIME_2R<0b0000000000000000011001, "rdtimeh.w">; -def CPUCFG : ALU_2R<0b0000000000000000011011, "cpucfg">; +def SYSCALL : MISC_I15<0b00000000001010110>; +def BREAK : MISC_I15<0b00000000001010100>; +def RDTIMEL_W : RDTIME_2R<0b0000000000000000011000>; +def RDTIMEH_W : RDTIME_2R<0b0000000000000000011001>; +def CPUCFG : ALU_2R<0b0000000000000000011011>; // Cache Maintenance Instructions -def CACOP : FmtCACOP<(outs), (ins uimm5:$op, GPR:$rj, simm12:$imm12), "cacop", +def CACOP : FmtCACOP<(outs), (ins uimm5:$op, GPR:$rj, simm12:$imm12), "$op, $rj, $imm12">; /// LA64 instructions @@ -599,159 +684,161 @@ def CACOP : FmtCACOP<(outs), (ins uimm5:$op, GPR:$rj, simm12:$imm12), "cacop", let Predicates = [IsLA64] in { // Arithmetic Operation Instructions for 64-bits -def ADD_D : ALU_3R<0b00000000000100001, "add.d">; -def SUB_D : ALU_3R<0b00000000000100011, "sub.d">; -def ADDI_D : ALU_2RI12<0b0000001011, "addi.d", simm12_addlike>; -def ADDU16I_D : ALU_2RI16<0b000100, "addu16i.d", simm16>; -def ALSL_WU : ALU_3RI2<0b000000000000011, "alsl.wu", uimm2_plus1>; -def ALSL_D : ALU_3RI2<0b000000000010110, "alsl.d", uimm2_plus1>; +def ADD_D : ALU_3R<0b00000000000100001>; +def SUB_D : ALU_3R<0b00000000000100011>; +def ADDI_D : ALU_2RI12<0b0000001011, simm12_addlike>; +def ADDU16I_D : ALU_2RI16<0b000100, simm16>; +def ALSL_WU : ALU_3RI2<0b000000000000011, uimm2_plus1>; +def ALSL_D : ALU_3RI2<0b000000000010110, uimm2_plus1>; let Constraints = "$rd = $dst" in { +let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in def LU32I_D : Fmt1RI20<0b0001011, (outs GPR:$dst), - (ins GPR:$rd, simm20_lu32id:$imm20), "lu32i.d", + (ins GPR:$rd, simm20_lu32id:$imm20), "$rd, $imm20">; } -def LU52I_D : ALU_2RI12<0b0000001100, "lu52i.d", simm12_lu52id>; -def PCADDU18I : ALU_1RI20<0b0001111, "pcaddu18i", simm20>; -def MUL_D : ALU_3R<0b00000000000111011, "mul.d">; -def MULH_D : ALU_3R<0b00000000000111100, "mulh.d">; -def MULH_DU : ALU_3R<0b00000000000111101, "mulh.du">; -def MULW_D_W : ALU_3R<0b00000000000111110, "mulw.d.w">; -def MULW_D_WU : ALU_3R<0b00000000000111111, "mulw.d.wu">; +def LU52I_D : ALU_2RI12<0b0000001100, simm12_lu52id>; +def PCADDU18I : ALU_1RI20<0b0001111, simm20_pcaddu18i>; +def MUL_D : ALU_3R<0b00000000000111011>; +def MULH_D : ALU_3R<0b00000000000111100>; +def MULH_DU : ALU_3R<0b00000000000111101>; +def MULW_D_W : ALU_3R<0b00000000000111110>; +def MULW_D_WU : ALU_3R<0b00000000000111111>; let usesCustomInserter = true in { -def DIV_D : ALU_3R<0b00000000001000100, "div.d">; -def MOD_D : ALU_3R<0b00000000001000101, "mod.d">; -def DIV_DU : ALU_3R<0b00000000001000110, "div.du">; -def MOD_DU : ALU_3R<0b00000000001000111, "mod.du">; +def DIV_D : ALU_3R<0b00000000001000100>; +def MOD_D : ALU_3R<0b00000000001000101>; +def DIV_DU : ALU_3R<0b00000000001000110>; +def MOD_DU : ALU_3R<0b00000000001000111>; } // usesCustomInserter = true // Bit-shift Instructions for 64-bits -def SLL_D : ALU_3R<0b00000000000110001, "sll.d">; -def SRL_D : ALU_3R<0b00000000000110010, "srl.d">; -def SRA_D : ALU_3R<0b00000000000110011, "sra.d">; -def ROTR_D : ALU_3R<0b00000000000110111, "rotr.d">; -def SLLI_D : ALU_2RI6<0b0000000001000001, "slli.d", uimm6>; -def SRLI_D : ALU_2RI6<0b0000000001000101, "srli.d", uimm6>; -def SRAI_D : ALU_2RI6<0b0000000001001001, "srai.d", uimm6>; -def ROTRI_D : ALU_2RI6<0b0000000001001101, "rotri.d", uimm6>; +def SLL_D : ALU_3R<0b00000000000110001>; +def SRL_D : ALU_3R<0b00000000000110010>; +def SRA_D : ALU_3R<0b00000000000110011>; +def ROTR_D : ALU_3R<0b00000000000110111>; +def SLLI_D : ALU_2RI6<0b0000000001000001, uimm6>; +def SRLI_D : ALU_2RI6<0b0000000001000101, uimm6>; +def SRAI_D : ALU_2RI6<0b0000000001001001, uimm6>; +def ROTRI_D : ALU_2RI6<0b0000000001001101, uimm6>; // Bit-manipulation Instructions for 64-bits -def CLO_D : ALU_2R<0b0000000000000000001000, "clo.d">; -def CLZ_D : ALU_2R<0b0000000000000000001001, "clz.d">; -def CTO_D : ALU_2R<0b0000000000000000001010, "cto.d">; -def CTZ_D : ALU_2R<0b0000000000000000001011, "ctz.d">; -def BYTEPICK_D : ALU_3RI3<0b00000000000011, "bytepick.d", uimm3>; -def REVB_4H : ALU_2R<0b0000000000000000001101, "revb.4h">; -def REVB_2W : ALU_2R<0b0000000000000000001110, "revb.2w">; -def REVB_D : ALU_2R<0b0000000000000000001111, "revb.d">; -def REVH_2W : ALU_2R<0b0000000000000000010000, "revh.2w">; -def REVH_D : ALU_2R<0b0000000000000000010001, "revh.d">; -def BITREV_8B : ALU_2R<0b0000000000000000010011, "bitrev.8b">; -def BITREV_D : ALU_2R<0b0000000000000000010101, "bitrev.d">; +def CLO_D : ALU_2R<0b0000000000000000001000>; +def CLZ_D : ALU_2R<0b0000000000000000001001>; +def CTO_D : ALU_2R<0b0000000000000000001010>; +def CTZ_D : ALU_2R<0b0000000000000000001011>; +def BYTEPICK_D : ALU_3RI3<0b00000000000011, uimm3>; +def REVB_4H : ALU_2R<0b0000000000000000001101>; +def REVB_2W : ALU_2R<0b0000000000000000001110>; +def REVB_D : ALU_2R<0b0000000000000000001111>; +def REVH_2W : ALU_2R<0b0000000000000000010000>; +def REVH_D : ALU_2R<0b0000000000000000010001>; +def BITREV_8B : ALU_2R<0b0000000000000000010011>; +def BITREV_D : ALU_2R<0b0000000000000000010101>; let Constraints = "$rd = $dst" in { def BSTRINS_D : FmtBSTR_D<0b0000000010, (outs GPR:$dst), (ins GPR:$rd, GPR:$rj, uimm6:$msbd, uimm6:$lsbd), - "bstrins.d", "$rd, $rj, $msbd, $lsbd">; + "$rd, $rj, $msbd, $lsbd">; } def BSTRPICK_D : FmtBSTR_D<0b0000000011, (outs GPR:$rd), (ins GPR:$rj, uimm6:$msbd, uimm6:$lsbd), - "bstrpick.d", "$rd, $rj, $msbd, $lsbd">; + "$rd, $rj, $msbd, $lsbd">; // Common Memory Access Instructions for 64-bits -def LD_WU : LOAD_2RI12<0b0010101010, "ld.wu">; -def LD_D : LOAD_2RI12<0b0010100011, "ld.d">; -def ST_D : STORE_2RI12<0b0010100111, "st.d">; -def LDX_B : LOAD_3R<0b00111000000000000, "ldx.b">; -def LDX_H : LOAD_3R<0b00111000000001000, "ldx.h">; -def LDX_W : LOAD_3R<0b00111000000010000, "ldx.w">; -def LDX_D : LOAD_3R<0b00111000000011000, "ldx.d">; -def LDX_BU : LOAD_3R<0b00111000001000000, "ldx.bu">; -def LDX_HU : LOAD_3R<0b00111000001001000, "ldx.hu">; -def LDX_WU : LOAD_3R<0b00111000001010000, "ldx.wu">; -def STX_B : STORE_3R<0b00111000000100000, "stx.b">; -def STX_H : STORE_3R<0b00111000000101000, "stx.h">; -def STX_W : STORE_3R<0b00111000000110000, "stx.w">; -def STX_D : STORE_3R<0b00111000000111000, "stx.d">; -def LDPTR_W : LOAD_2RI14<0b00100100, "ldptr.w">; -def LDPTR_D : LOAD_2RI14<0b00100110, "ldptr.d">; -def STPTR_W : STORE_2RI14<0b00100101, "stptr.w">; -def STPTR_D : STORE_2RI14<0b00100111, "stptr.d">; -def PRELDX : FmtPRELDX<(outs), (ins uimm5:$imm5, GPR:$rj, GPR:$rk), "preldx", +def LD_WU : LOAD_2RI12<0b0010101010>; +def LD_D : LOAD_2RI12<0b0010100011>; +def ST_D : STORE_2RI12<0b0010100111>; +def LDX_B : LOAD_3R<0b00111000000000000>; +def LDX_H : LOAD_3R<0b00111000000001000>; +def LDX_W : LOAD_3R<0b00111000000010000>; +def LDX_D : LOAD_3R<0b00111000000011000>; +def LDX_BU : LOAD_3R<0b00111000001000000>; +def LDX_HU : LOAD_3R<0b00111000001001000>; +def LDX_WU : LOAD_3R<0b00111000001010000>; +def STX_B : STORE_3R<0b00111000000100000>; +def STX_H : STORE_3R<0b00111000000101000>; +def STX_W : STORE_3R<0b00111000000110000>; +def STX_D : STORE_3R<0b00111000000111000>; +def LDPTR_W : LOAD_2RI14<0b00100100>; +def LDPTR_D : LOAD_2RI14<0b00100110>; +def STPTR_W : STORE_2RI14<0b00100101>; +def STPTR_D : STORE_2RI14<0b00100111>; +let hasSideEffects = 0, mayLoad = 1, mayStore = 1 in +def PRELDX : FmtPRELDX<(outs), (ins uimm5:$imm5, GPR:$rj, GPR:$rk), "$imm5, $rj, $rk">; // Bound Check Memory Access Instructions -def LDGT_B : LOAD_3R<0b00111000011110000, "ldgt.b">; -def LDGT_H : LOAD_3R<0b00111000011110001, "ldgt.h">; -def LDGT_W : LOAD_3R<0b00111000011110010, "ldgt.w">; -def LDGT_D : LOAD_3R<0b00111000011110011, "ldgt.d">; -def LDLE_B : LOAD_3R<0b00111000011110100, "ldle.b">; -def LDLE_H : LOAD_3R<0b00111000011110101, "ldle.h">; -def LDLE_W : LOAD_3R<0b00111000011110110, "ldle.w">; -def LDLE_D : LOAD_3R<0b00111000011110111, "ldle.d">; -def STGT_B : STORE_3R<0b00111000011111000, "stgt.b">; -def STGT_H : STORE_3R<0b00111000011111001, "stgt.h">; -def STGT_W : STORE_3R<0b00111000011111010, "stgt.w">; -def STGT_D : STORE_3R<0b00111000011111011, "stgt.d">; -def STLE_B : STORE_3R<0b00111000011111100, "stle.b">; -def STLE_H : STORE_3R<0b00111000011111101, "stle.h">; -def STLE_W : STORE_3R<0b00111000011111110, "stle.w">; -def STLE_D : STORE_3R<0b00111000011111111, "stle.d">; +def LDGT_B : LOAD_3R<0b00111000011110000>; +def LDGT_H : LOAD_3R<0b00111000011110001>; +def LDGT_W : LOAD_3R<0b00111000011110010>; +def LDGT_D : LOAD_3R<0b00111000011110011>; +def LDLE_B : LOAD_3R<0b00111000011110100>; +def LDLE_H : LOAD_3R<0b00111000011110101>; +def LDLE_W : LOAD_3R<0b00111000011110110>; +def LDLE_D : LOAD_3R<0b00111000011110111>; +def STGT_B : STORE_3R<0b00111000011111000>; +def STGT_H : STORE_3R<0b00111000011111001>; +def STGT_W : STORE_3R<0b00111000011111010>; +def STGT_D : STORE_3R<0b00111000011111011>; +def STLE_B : STORE_3R<0b00111000011111100>; +def STLE_H : STORE_3R<0b00111000011111101>; +def STLE_W : STORE_3R<0b00111000011111110>; +def STLE_D : STORE_3R<0b00111000011111111>; // Atomic Memory Access Instructions for 64-bits -def AMSWAP_W : AM_3R<0b00111000011000000, "amswap.w">; -def AMSWAP_D : AM_3R<0b00111000011000001, "amswap.d">; -def AMADD_W : AM_3R<0b00111000011000010, "amadd.w">; -def AMADD_D : AM_3R<0b00111000011000011, "amadd.d">; -def AMAND_W : AM_3R<0b00111000011000100, "amand.w">; -def AMAND_D : AM_3R<0b00111000011000101, "amand.d">; -def AMOR_W : AM_3R<0b00111000011000110, "amor.w">; -def AMOR_D : AM_3R<0b00111000011000111, "amor.d">; -def AMXOR_W : AM_3R<0b00111000011001000, "amxor.w">; -def AMXOR_D : AM_3R<0b00111000011001001, "amxor.d">; -def AMMAX_W : AM_3R<0b00111000011001010, "ammax.w">; -def AMMAX_D : AM_3R<0b00111000011001011, "ammax.d">; -def AMMIN_W : AM_3R<0b00111000011001100, "ammin.w">; -def AMMIN_D : AM_3R<0b00111000011001101, "ammin.d">; -def AMMAX_WU : AM_3R<0b00111000011001110, "ammax.wu">; -def AMMAX_DU : AM_3R<0b00111000011001111, "ammax.du">; -def AMMIN_WU : AM_3R<0b00111000011010000, "ammin.wu">; -def AMMIN_DU : AM_3R<0b00111000011010001, "ammin.du">; -def AMSWAP_DB_W : AM_3R<0b00111000011010010, "amswap_db.w">; -def AMSWAP_DB_D : AM_3R<0b00111000011010011, "amswap_db.d">; -def AMADD_DB_W : AM_3R<0b00111000011010100, "amadd_db.w">; -def AMADD_DB_D : AM_3R<0b00111000011010101, "amadd_db.d">; -def AMAND_DB_W : AM_3R<0b00111000011010110, "amand_db.w">; -def AMAND_DB_D : AM_3R<0b00111000011010111, "amand_db.d">; -def AMOR_DB_W : AM_3R<0b00111000011011000, "amor_db.w">; -def AMOR_DB_D : AM_3R<0b00111000011011001, "amor_db.d">; -def AMXOR_DB_W : AM_3R<0b00111000011011010, "amxor_db.w">; -def AMXOR_DB_D : AM_3R<0b00111000011011011, "amxor_db.d">; -def AMMAX_DB_W : AM_3R<0b00111000011011100, "ammax_db.w">; -def AMMAX_DB_D : AM_3R<0b00111000011011101, "ammax_db.d">; -def AMMIN_DB_W : AM_3R<0b00111000011011110, "ammin_db.w">; -def AMMIN_DB_D : AM_3R<0b00111000011011111, "ammin_db.d">; -def AMMAX_DB_WU : AM_3R<0b00111000011100000, "ammax_db.wu">; -def AMMAX_DB_DU : AM_3R<0b00111000011100001, "ammax_db.du">; -def AMMIN_DB_WU : AM_3R<0b00111000011100010, "ammin_db.wu">; -def AMMIN_DB_DU : AM_3R<0b00111000011100011, "ammin_db.du">; -def LL_D : LLBase<0b00100010, "ll.d">; -def SC_D : SCBase<0b00100011, "sc.d">; +def AMSWAP_W : AM_3R<0b00111000011000000>; +def AMSWAP_D : AM_3R<0b00111000011000001>; +def AMADD_W : AM_3R<0b00111000011000010>; +def AMADD_D : AM_3R<0b00111000011000011>; +def AMAND_W : AM_3R<0b00111000011000100>; +def AMAND_D : AM_3R<0b00111000011000101>; +def AMOR_W : AM_3R<0b00111000011000110>; +def AMOR_D : AM_3R<0b00111000011000111>; +def AMXOR_W : AM_3R<0b00111000011001000>; +def AMXOR_D : AM_3R<0b00111000011001001>; +def AMMAX_W : AM_3R<0b00111000011001010>; +def AMMAX_D : AM_3R<0b00111000011001011>; +def AMMIN_W : AM_3R<0b00111000011001100>; +def AMMIN_D : AM_3R<0b00111000011001101>; +def AMMAX_WU : AM_3R<0b00111000011001110>; +def AMMAX_DU : AM_3R<0b00111000011001111>; +def AMMIN_WU : AM_3R<0b00111000011010000>; +def AMMIN_DU : AM_3R<0b00111000011010001>; +def AMSWAP__DB_W : AM_3R<0b00111000011010010>; +def AMSWAP__DB_D : AM_3R<0b00111000011010011>; +def AMADD__DB_W : AM_3R<0b00111000011010100>; +def AMADD__DB_D : AM_3R<0b00111000011010101>; +def AMAND__DB_W : AM_3R<0b00111000011010110>; +def AMAND__DB_D : AM_3R<0b00111000011010111>; +def AMOR__DB_W : AM_3R<0b00111000011011000>; +def AMOR__DB_D : AM_3R<0b00111000011011001>; +def AMXOR__DB_W : AM_3R<0b00111000011011010>; +def AMXOR__DB_D : AM_3R<0b00111000011011011>; +def AMMAX__DB_W : AM_3R<0b00111000011011100>; +def AMMAX__DB_D : AM_3R<0b00111000011011101>; +def AMMIN__DB_W : AM_3R<0b00111000011011110>; +def AMMIN__DB_D : AM_3R<0b00111000011011111>; +def AMMAX__DB_WU : AM_3R<0b00111000011100000>; +def AMMAX__DB_DU : AM_3R<0b00111000011100001>; +def AMMIN__DB_WU : AM_3R<0b00111000011100010>; +def AMMIN__DB_DU : AM_3R<0b00111000011100011>; +def LL_D : LLBase<0b00100010>; +def SC_D : SCBase<0b00100011>; // CRC Check Instructions -def CRC_W_B_W : ALU_3R<0b00000000001001000, "crc.w.b.w">; -def CRC_W_H_W : ALU_3R<0b00000000001001001, "crc.w.h.w">; -def CRC_W_W_W : ALU_3R<0b00000000001001010, "crc.w.w.w">; -def CRC_W_D_W : ALU_3R<0b00000000001001011, "crc.w.d.w">; -def CRCC_W_B_W : ALU_3R<0b00000000001001100, "crcc.w.b.w">; -def CRCC_W_H_W : ALU_3R<0b00000000001001101, "crcc.w.h.w">; -def CRCC_W_W_W : ALU_3R<0b00000000001001110, "crcc.w.w.w">; -def CRCC_W_D_W : ALU_3R<0b00000000001001111, "crcc.w.d.w">; +def CRC_W_B_W : ALU_3R<0b00000000001001000>; +def CRC_W_H_W : ALU_3R<0b00000000001001001>; +def CRC_W_W_W : ALU_3R<0b00000000001001010>; +def CRC_W_D_W : ALU_3R<0b00000000001001011>; +def CRCC_W_B_W : ALU_3R<0b00000000001001100>; +def CRCC_W_H_W : ALU_3R<0b00000000001001101>; +def CRCC_W_W_W : ALU_3R<0b00000000001001110>; +def CRCC_W_D_W : ALU_3R<0b00000000001001111>; // Other Miscellaneous Instructions for 64-bits def ASRTLE_D : FmtASRT<0b00000000000000010, (outs), (ins GPR:$rj, GPR:$rk), - "asrtle.d", "$rj, $rk">; + "$rj, $rk">; def ASRTGT_D : FmtASRT<0b00000000000000011, (outs), (ins GPR:$rj, GPR:$rk), - "asrtgt.d", "$rj, $rk">; -def RDTIME_D : RDTIME_2R<0b0000000000000000011010, "rdtime.d">; + "$rj, $rk">; +def RDTIME_D : RDTIME_2R<0b0000000000000000011010>; } // Predicates = [IsLA64] //===----------------------------------------------------------------------===// @@ -1040,18 +1127,45 @@ def : Pat<(brind GPR:$rj), (PseudoBRIND GPR:$rj, 0)>; def : Pat<(brind (add GPR:$rj, simm16_lsl2:$imm16)), (PseudoBRIND GPR:$rj, simm16_lsl2:$imm16)>; +// Function call with 'Small' code model. let isCall = 1, Defs = [R1] in -def PseudoCALL : Pseudo<(outs), (ins simm26_symbol:$func)>; +def PseudoCALL : Pseudo<(outs), (ins bare_symbol:$func)>; def : Pat<(loongarch_call tglobaladdr:$func), (PseudoCALL tglobaladdr:$func)>; def : Pat<(loongarch_call texternalsym:$func), (PseudoCALL texternalsym:$func)>; +// Function call with 'Medium' code model. +let isCall = 1, Defs = [R1, R20], Size = 8 in +def PseudoCALL_MEDIUM : Pseudo<(outs), (ins bare_symbol:$func)>; + +let Predicates = [IsLA64] in { +def : Pat<(loongarch_call_medium tglobaladdr:$func), + (PseudoCALL_MEDIUM tglobaladdr:$func)>; +def : Pat<(loongarch_call_medium texternalsym:$func), + (PseudoCALL_MEDIUM texternalsym:$func)>; +} // Predicates = [IsLA64] + +// Function call with 'Large' code model. +let isCall = 1, Defs = [R1, R20], Size = 24 in +def PseudoCALL_LARGE: Pseudo<(outs), (ins bare_symbol:$func)>; + +let Predicates = [IsLA64] in { +def : Pat<(loongarch_call_large tglobaladdr:$func), + (PseudoCALL_LARGE tglobaladdr:$func)>; +def : Pat<(loongarch_call_large texternalsym:$func), + (PseudoCALL_LARGE texternalsym:$func)>; +} // Predicates = [IsLA64] + let isCall = 1, Defs = [R1] in def PseudoCALLIndirect : Pseudo<(outs), (ins GPR:$rj), [(loongarch_call GPR:$rj)]>, PseudoInstExpansion<(JIRL R1, GPR:$rj, 0)>; +let Predicates = [IsLA64] in { +def : Pat<(loongarch_call_medium GPR:$rj), (PseudoCALLIndirect GPR:$rj)>; +def : Pat<(loongarch_call_large GPR:$rj), (PseudoCALLIndirect GPR:$rj)>; +} -let isCall = 1, Defs = [R1] in +let isCall = 1, hasSideEffects = 0, mayStore = 0, mayLoad = 0, Defs = [R1] in def PseudoJIRL_CALL : Pseudo<(outs), (ins GPR:$rj, simm16_lsl2:$imm16)>, PseudoInstExpansion<(JIRL R1, GPR:$rj, simm16_lsl2:$imm16)>; @@ -1060,28 +1174,72 @@ let isBarrier = 1, isReturn = 1, isTerminator = 1 in def PseudoRET : Pseudo<(outs), (ins), [(loongarch_ret)]>, PseudoInstExpansion<(JIRL R0, R1, 0)>; +// Tail call with 'Small' code model. let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [R3] in -def PseudoTAIL : Pseudo<(outs), (ins simm26_symbol:$dst)>; +def PseudoTAIL : Pseudo<(outs), (ins bare_symbol:$dst)>; def : Pat<(loongarch_tail (iPTR tglobaladdr:$dst)), (PseudoTAIL tglobaladdr:$dst)>; def : Pat<(loongarch_tail (iPTR texternalsym:$dst)), (PseudoTAIL texternalsym:$dst)>; +// Tail call with 'Medium' code model. +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, + Uses = [R3], Defs = [R20], Size = 8 in +def PseudoTAIL_MEDIUM : Pseudo<(outs), (ins bare_symbol:$dst)>; + +let Predicates = [IsLA64] in { +def : Pat<(loongarch_tail_medium (iPTR tglobaladdr:$dst)), + (PseudoTAIL_MEDIUM tglobaladdr:$dst)>; +def : Pat<(loongarch_tail_medium (iPTR texternalsym:$dst)), + (PseudoTAIL_MEDIUM texternalsym:$dst)>; +} // Predicates = [IsLA64] + +// Tail call with 'Large' code model. +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, + Uses = [R3], Defs = [R19, R20], Size = 24 in +def PseudoTAIL_LARGE : Pseudo<(outs), (ins bare_symbol:$dst)>; + +let Predicates = [IsLA64] in { +def : Pat<(loongarch_tail_large (iPTR tglobaladdr:$dst)), + (PseudoTAIL_LARGE tglobaladdr:$dst)>; +def : Pat<(loongarch_tail_large (iPTR texternalsym:$dst)), + (PseudoTAIL_LARGE texternalsym:$dst)>; +} // Predicates = [IsLA64] + let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [R3] in def PseudoTAILIndirect : Pseudo<(outs), (ins GPRT:$rj), [(loongarch_tail GPRT:$rj)]>, PseudoInstExpansion<(JIRL R0, GPR:$rj, 0)>; +let Predicates = [IsLA64] in { +def : Pat<(loongarch_tail_medium GPR:$rj), (PseudoTAILIndirect GPR:$rj)>; +def : Pat<(loongarch_tail_large GPR:$rj), (PseudoTAILIndirect GPR:$rj)>; +} -let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [R3] in +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, + hasSideEffects = 0, mayStore = 0, mayLoad = 0, Uses = [R3] in def PseudoB_TAIL : Pseudo<(outs), (ins simm26_b:$imm26)>, PseudoInstExpansion<(B simm26_b:$imm26)>; -let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [R3] in +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, + hasSideEffects = 0, mayStore = 0, mayLoad = 0, Uses = [R3] in def PseudoJIRL_TAIL : Pseudo<(outs), (ins GPR:$rj, simm16_lsl2:$imm16)>, PseudoInstExpansion<(JIRL R0, GPR:$rj, simm16_lsl2:$imm16)>; +/// call36/taill36 macro instructions +let isCall = 1, isBarrier = 1, isCodeGenOnly = 0, isAsmParserOnly = 1, + Defs = [R1], Size = 8, hasSideEffects = 0, mayStore = 0, mayLoad = 0 in +def PseudoCALL36 : Pseudo<(outs), (ins bare_symbol:$dst), [], + "call36", "$dst">, + Requires<[IsLA64]>; +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [R3], + isCodeGenOnly = 0, isAsmParserOnly = 1, Size = 8, hasSideEffects = 0, + mayStore = 0, mayLoad = 0 in +def PseudoTAIL36 : Pseudo<(outs), (ins GPR:$tmp, bare_symbol:$dst), [], + "tail36", "$tmp, $dst">, + Requires<[IsLA64]>; + /// Load address (la*) macro instructions. // Define isCodeGenOnly = 0 to expose them to tablegened assembly parser. @@ -1094,6 +1252,7 @@ def PseudoLA_ABS_LARGE : Pseudo<(outs GPR:$dst), "la.abs", "$dst, $src">; def PseudoLA_PCREL : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [], "la.pcrel", "$dst, $src">; +let Defs = [R20], Size = 20 in def PseudoLA_PCREL_LARGE : Pseudo<(outs GPR:$dst), (ins GPR:$tmp, bare_symbol:$src), [], "la.pcrel", "$dst, $tmp, $src">, @@ -1105,28 +1264,30 @@ let hasSideEffects = 0, mayLoad = 1, mayStore = 0, isCodeGenOnly = 0, isAsmParserOnly = 1 in { def PseudoLA_GOT : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [], "la.got", "$dst, $src">; +def PseudoLA_TLS_IE : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [], + "la.tls.ie", "$dst, $src">; +def PseudoLA_TLS_LD : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [], + "la.tls.ld", "$dst, $src">; +def PseudoLA_TLS_GD : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [], + "la.tls.gd", "$dst, $src">; +let Defs = [R20], Size = 20 in { def PseudoLA_GOT_LARGE : Pseudo<(outs GPR:$dst), (ins GPR:$tmp, bare_symbol:$src), [], "la.got", "$dst, $tmp, $src">, Requires<[IsLA64]>; -def PseudoLA_TLS_IE : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [], - "la.tls.ie", "$dst, $src">; def PseudoLA_TLS_IE_LARGE : Pseudo<(outs GPR:$dst), (ins GPR:$tmp, bare_symbol:$src), [], "la.tls.ie", "$dst, $tmp, $src">, Requires<[IsLA64]>; -def PseudoLA_TLS_LD : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [], - "la.tls.ld", "$dst, $src">; def PseudoLA_TLS_LD_LARGE : Pseudo<(outs GPR:$dst), (ins GPR:$tmp, bare_symbol:$src), [], "la.tls.ld", "$dst, $tmp, $src">, Requires<[IsLA64]>; -def PseudoLA_TLS_GD : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [], - "la.tls.gd", "$dst, $src">; def PseudoLA_TLS_GD_LARGE : Pseudo<(outs GPR:$dst), (ins GPR:$tmp, bare_symbol:$src), [], "la.tls.gd", "$dst, $tmp, $src">, Requires<[IsLA64]>; +} // Defs = [R20], Size = 20 } // Load address inst alias: "la", "la.global" and "la.local". @@ -1343,7 +1504,7 @@ defm : AtomicStPat, def PseudoAtomicStoreW : Pseudo<(outs GPR:$dst), (ins GPR:$rj, GPR:$rk)>, - PseudoInstExpansion<(AMSWAP_DB_W R0, GPR:$rk, GPRMemAtomic:$rj)>; + PseudoInstExpansion<(AMSWAP__DB_W R0, GPR:$rk, GPRMemAtomic:$rj)>; def : Pat<(atomic_store_release_seqcst_32 GPR:$rj, GPR:$rk), (PseudoAtomicStoreW GPR:$rj, GPR:$rk)>; @@ -1351,7 +1512,7 @@ def : Pat<(atomic_store_release_seqcst_32 GPR:$rj, GPR:$rk), let Predicates = [IsLA64] in { def PseudoAtomicStoreD : Pseudo<(outs GPR:$dst), (ins GPR:$rj, GPR:$rk)>, - PseudoInstExpansion<(AMSWAP_DB_D R0, GPR:$rk, GPRMemAtomic:$rj)>; + PseudoInstExpansion<(AMSWAP__DB_D R0, GPR:$rk, GPRMemAtomic:$rj)>; def : Pat<(atomic_store_release_seqcst_64 GPR:$rj, GPR:$rk), (PseudoAtomicStoreD GPR:$rj, GPR:$rk)>; @@ -1478,54 +1639,54 @@ let Predicates = [IsLA64] in { def : AtomicPat; def : Pat<(atomic_swap_32 GPR:$addr, GPR:$incr), - (AMSWAP_DB_W GPR:$incr, GPR:$addr)>; + (AMSWAP__DB_W GPR:$incr, GPR:$addr)>; def : Pat<(atomic_swap_64 GPR:$addr, GPR:$incr), - (AMSWAP_DB_D GPR:$incr, GPR:$addr)>; + (AMSWAP__DB_D GPR:$incr, GPR:$addr)>; def : Pat<(atomic_load_add_64 GPR:$rj, GPR:$rk), - (AMADD_DB_D GPR:$rk, GPR:$rj)>; + (AMADD__DB_D GPR:$rk, GPR:$rj)>; def : AtomicPat; def : Pat<(atomic_load_sub_32 GPR:$rj, GPR:$rk), - (AMADD_DB_W (SUB_W R0, GPR:$rk), GPR:$rj)>; + (AMADD__DB_W (SUB_W R0, GPR:$rk), GPR:$rj)>; def : Pat<(atomic_load_sub_64 GPR:$rj, GPR:$rk), - (AMADD_DB_D (SUB_D R0, GPR:$rk), GPR:$rj)>; + (AMADD__DB_D (SUB_D R0, GPR:$rk), GPR:$rj)>; def : AtomicPat; defm : PseudoBinPat<"atomic_load_nand_64", PseudoAtomicLoadNand64>; def : AtomicPat; def : Pat<(atomic_load_add_32 GPR:$rj, GPR:$rk), - (AMADD_DB_W GPR:$rk, GPR:$rj)>; + (AMADD__DB_W GPR:$rk, GPR:$rj)>; def : Pat<(atomic_load_and_32 GPR:$rj, GPR:$rk), - (AMAND_DB_W GPR:$rk, GPR:$rj)>; + (AMAND__DB_W GPR:$rk, GPR:$rj)>; def : Pat<(atomic_load_and_64 GPR:$rj, GPR:$rk), - (AMAND_DB_D GPR:$rk, GPR:$rj)>; + (AMAND__DB_D GPR:$rk, GPR:$rj)>; def : Pat<(atomic_load_or_32 GPR:$rj, GPR:$rk), - (AMOR_DB_W GPR:$rk, GPR:$rj)>; + (AMOR__DB_W GPR:$rk, GPR:$rj)>; def : Pat<(atomic_load_or_64 GPR:$rj, GPR:$rk), - (AMOR_DB_D GPR:$rk, GPR:$rj)>; + (AMOR__DB_D GPR:$rk, GPR:$rj)>; def : Pat<(atomic_load_xor_32 GPR:$rj, GPR:$rk), - (AMXOR_DB_W GPR:$rk, GPR:$rj)>; + (AMXOR__DB_W GPR:$rk, GPR:$rj)>; def : Pat<(atomic_load_xor_64 GPR:$rj, GPR:$rk), - (AMXOR_DB_D GPR:$rk, GPR:$rj)>; + (AMXOR__DB_D GPR:$rk, GPR:$rj)>; def : Pat<(atomic_load_umin_32 GPR:$rj, GPR:$rk), - (AMMIN_DB_WU GPR:$rk, GPR:$rj)>; + (AMMIN__DB_WU GPR:$rk, GPR:$rj)>; def : Pat<(atomic_load_umin_64 GPR:$rj, GPR:$rk), - (AMMIN_DB_DU GPR:$rk, GPR:$rj)>; + (AMMIN__DB_DU GPR:$rk, GPR:$rj)>; def : Pat<(atomic_load_umax_32 GPR:$rj, GPR:$rk), - (AMMAX_DB_WU GPR:$rk, GPR:$rj)>; + (AMMAX__DB_WU GPR:$rk, GPR:$rj)>; def : Pat<(atomic_load_umax_64 GPR:$rj, GPR:$rk), - (AMMAX_DB_DU GPR:$rk, GPR:$rj)>; + (AMMAX__DB_DU GPR:$rk, GPR:$rj)>; def : Pat<(atomic_load_min_32 GPR:$rj, GPR:$rk), - (AMMIN_DB_W GPR:$rk, GPR:$rj)>; + (AMMIN__DB_W GPR:$rk, GPR:$rj)>; def : Pat<(atomic_load_min_64 GPR:$rj, GPR:$rk), - (AMMIN_DB_D GPR:$rk, GPR:$rj)>; + (AMMIN__DB_D GPR:$rk, GPR:$rj)>; def : Pat<(atomic_load_max_32 GPR:$rj, GPR:$rk), - (AMMAX_DB_W GPR:$rk, GPR:$rj)>; + (AMMAX__DB_W GPR:$rk, GPR:$rj)>; def : Pat<(atomic_load_max_64 GPR:$rj, GPR:$rk), - (AMMAX_DB_D GPR:$rk, GPR:$rj)>; + (AMMAX__DB_D GPR:$rk, GPR:$rj)>; def : AtomicPat; @@ -1569,9 +1730,9 @@ defm : PseudoBinPat<"atomic_load_xor_32", PseudoAtomicLoadXor32>; /// Intrinsics def : Pat<(int_loongarch_cacop_d timm:$op, i64:$rj, timm:$imm12), - (CACOP uimm5:$op, GPR:$rj, simm12:$imm12)>; + (CACOP timm:$op, GPR:$rj, timm:$imm12)>; def : Pat<(int_loongarch_cacop_w i32:$op, i32:$rj, i32:$imm12), - (CACOP uimm5:$op, GPR:$rj, simm12:$imm12)>; + (CACOP timm:$op, GPR:$rj, timm:$imm12)>; def : Pat<(loongarch_dbar uimm15:$imm15), (DBAR uimm15:$imm15)>; def : Pat<(loongarch_ibar uimm15:$imm15), (IBAR uimm15:$imm15)>; def : Pat<(loongarch_break uimm15:$imm15), (BREAK uimm15:$imm15)>; @@ -1660,48 +1821,52 @@ let Predicates = [HasBasicF], usesCustomInserter = 1 in { //===----------------------------------------------------------------------===// // CSR Access Instructions +let hasSideEffects = 1 in def CSRRD : FmtCSR<0b0000010000000, (outs GPR:$rd), (ins uimm14:$csr_num), - "csrrd", "$rd, $csr_num">; -let Constraints = "$rd = $dst" in { + "$rd, $csr_num">; +let hasSideEffects = 1, Constraints = "$rd = $dst" in { def CSRWR : FmtCSR<0b0000010000001, (outs GPR:$dst), - (ins GPR:$rd, uimm14:$csr_num), "csrwr", "$rd, $csr_num">; + (ins GPR:$rd, uimm14:$csr_num), "$rd, $csr_num">; def CSRXCHG : FmtCSRXCHG<0b00000100, (outs GPR:$dst), (ins GPR:$rd, GPR:$rj, uimm14:$csr_num), - "csrxchg", "$rd, $rj, $csr_num">; -} // Constraints = "$rd = $dst" + "$rd, $rj, $csr_num">; +} // hasSideEffects = 1, Constraints = "$rd = $dst" // IOCSR Access Instructions -def IOCSRRD_B : IOCSRRD<0b0000011001001000000000, "iocsrrd.b">; -def IOCSRRD_H : IOCSRRD<0b0000011001001000000001, "iocsrrd.h">; -def IOCSRRD_W : IOCSRRD<0b0000011001001000000010, "iocsrrd.w">; -def IOCSRWR_B : IOCSRWR<0b0000011001001000000100, "iocsrwr.b">; -def IOCSRWR_H : IOCSRWR<0b0000011001001000000101, "iocsrwr.h">; -def IOCSRWR_W : IOCSRWR<0b0000011001001000000110, "iocsrwr.w">; +def IOCSRRD_B : IOCSRRD<0b0000011001001000000000>; +def IOCSRRD_H : IOCSRRD<0b0000011001001000000001>; +def IOCSRRD_W : IOCSRRD<0b0000011001001000000010>; +def IOCSRWR_B : IOCSRWR<0b0000011001001000000100>; +def IOCSRWR_H : IOCSRWR<0b0000011001001000000101>; +def IOCSRWR_W : IOCSRWR<0b0000011001001000000110>; let Predicates = [IsLA64] in { -def IOCSRRD_D : IOCSRRD<0b0000011001001000000011, "iocsrrd.d">; -def IOCSRWR_D : IOCSRWR<0b0000011001001000000111, "iocsrwr.d">; +def IOCSRRD_D : IOCSRRD<0b0000011001001000000011>; +def IOCSRWR_D : IOCSRWR<0b0000011001001000000111>; } // Predicates = [IsLA64] // TLB Maintenance Instructions -def TLBSRCH : FmtI32<0b00000110010010000010100000000000, "tlbsrch">; -def TLBRD : FmtI32<0b00000110010010000010110000000000, "tlbrd">; -def TLBWR : FmtI32<0b00000110010010000011000000000000, "tlbwr">; -def TLBFILL : FmtI32<0b00000110010010000011010000000000, "tlbfill">; -def TLBCLR : FmtI32<0b00000110010010000010000000000000, "tlbclr">; -def TLBFLUSH : FmtI32<0b00000110010010000010010000000000, "tlbflush">; -def INVTLB : FmtINVTLB<(outs), (ins GPR:$rk, GPR:$rj, uimm5:$op), "invtlb", +let hasSideEffects = 1, mayLoad = 0, mayStore = 0 in { +def TLBSRCH : FmtI32<0b00000110010010000010100000000000>; +def TLBRD : FmtI32<0b00000110010010000010110000000000>; +def TLBWR : FmtI32<0b00000110010010000011000000000000>; +def TLBFILL : FmtI32<0b00000110010010000011010000000000>; +def TLBCLR : FmtI32<0b00000110010010000010000000000000>; +def TLBFLUSH : FmtI32<0b00000110010010000010010000000000>; +def INVTLB : FmtINVTLB<(outs), (ins GPR:$rk, GPR:$rj, uimm5:$op), "$op, $rj, $rk">; +} // hasSideEffects = 1, mayLoad = 0, mayStore = 0 // Software Page Walking Instructions def LDDIR : Fmt2RI8<0b00000110010000, (outs GPR:$rd), - (ins GPR:$rj, uimm8:$imm8), "lddir", "$rd, $rj, $imm8">; -def LDPTE : FmtLDPTE<(outs), (ins GPR:$rj, uimm8:$seq), "ldpte", "$rj, $seq">; + (ins GPR:$rj, uimm8:$imm8), "$rd, $rj, $imm8">; +def LDPTE : FmtLDPTE<(outs), (ins GPR:$rj, uimm8:$seq), "$rj, $seq">; // Other Miscellaneous Instructions -def ERTN : FmtI32<0b00000110010010000011100000000000, "ertn">; -def DBCL : MISC_I15<0b00000000001010101, "dbcl">; -def IDLE : MISC_I15<0b00000110010010001, "idle">; +let hasSideEffects = 1, mayLoad = 0, mayStore = 0 in +def ERTN : FmtI32<0b00000110010010000011100000000000>; +def DBCL : MISC_I15<0b00000000001010101>; +def IDLE : MISC_I15<0b00000110010010001>; //===----------------------------------------------------------------------===// // Privilege Intrinsics @@ -1731,7 +1896,17 @@ def : Pat<(int_loongarch_asrtle_d GPR:$rj, GPR:$rk), def : Pat<(int_loongarch_asrtgt_d GPR:$rj, GPR:$rk), (ASRTGT_D GPR:$rj, GPR:$rk)>; def : Pat<(int_loongarch_lddir_d GPR:$rj, timm:$imm8), - (LDDIR GPR:$rj, uimm8:$imm8)>; + (LDDIR GPR:$rj, timm:$imm8)>; def : Pat<(int_loongarch_ldpte_d GPR:$rj, timm:$imm8), - (LDPTE GPR:$rj, uimm8:$imm8)>; + (LDPTE GPR:$rj, timm:$imm8)>; } // Predicates = [IsLA64] + +//===----------------------------------------------------------------------===// +// LSX Instructions +//===----------------------------------------------------------------------===// +include "LoongArchLSXInstrInfo.td" + +//===----------------------------------------------------------------------===// +// LASX Instructions +//===----------------------------------------------------------------------===// +include "LoongArchLASXInstrInfo.td" diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrFormats.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrFormats.td new file mode 100644 index 0000000000000000000000000000000000000000..ba21d68b9304ce9b0a5394bb29595ac5e6909975 --- /dev/null +++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrFormats.td @@ -0,0 +1,459 @@ +// LoongArchLASXInstrFormats.td - LoongArch LASX Instr Formats - tablegen -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Describe LoongArch LASX instructions format +// +// opcode - operation code. +// xd/rd/cd - destination register operand. +// {r/x}{j/k} - source register operand. +// immN - immediate data operand. +// +//===----------------------------------------------------------------------===// + +// 1RI13-type +// +class Fmt1RI13_XI op, dag outs, dag ins, string opnstr, + list pattern = []> + : LAInst.ret, opnstr, pattern> { + bits<13> imm13; + bits<5> xd; + + let Inst{31-0} = op; + let Inst{17-5} = imm13; + let Inst{4-0} = xd; +} + +// 2R-type +// +class Fmt2R_XX op, dag outs, dag ins, string opnstr, + list pattern = []> + : LAInst.ret, opnstr, pattern> { + bits<5> xj; + bits<5> xd; + + let Inst{31-0} = op; + let Inst{9-5} = xj; + let Inst{4-0} = xd; +} + +// +class Fmt2R_XR op, dag outs, dag ins, string opnstr, + list pattern = []> + : LAInst.ret, opnstr, pattern> { + bits<5> rj; + bits<5> xd; + + let Inst{31-0} = op; + let Inst{9-5} = rj; + let Inst{4-0} = xd; +} + +// +class Fmt2R_CX op, dag outs, dag ins, string opnstr, + list pattern = []> + : LAInst.ret, opnstr, pattern> { + bits<5> xj; + bits<3> cd; + + let Inst{31-0} = op; + let Inst{9-5} = xj; + let Inst{2-0} = cd; +} + +// 2RI1-type +// +class Fmt2RI1_XXI op, dag outs, dag ins, string opnstr, + list pattern = []> + : LAInst.ret, opnstr, pattern> { + bits<1> imm1; + bits<5> xj; + bits<5> xd; + + let Inst{31-0} = op; + let Inst{10} = imm1; + let Inst{9-5} = xj; + let Inst{4-0} = xd; +} + +// 2RI2-type +// +class Fmt2RI2_XXI op, dag outs, dag ins, string opnstr, + list pattern = []> + : LAInst.ret, opnstr, pattern> { + bits<2> imm2; + bits<5> xj; + bits<5> xd; + + let Inst{31-0} = op; + let Inst{11-10} = imm2; + let Inst{9-5} = xj; + let Inst{4-0} = xd; +} + +// +class Fmt2RI2_XRI op, dag outs, dag ins, string opnstr, + list pattern = []> + : LAInst.ret, opnstr, pattern> { + bits<2> imm2; + bits<5> rj; + bits<5> xd; + + let Inst{31-0} = op; + let Inst{11-10} = imm2; + let Inst{9-5} = rj; + let Inst{4-0} = xd; +} + +// +class Fmt2RI2_RXI op, dag outs, dag ins, string opnstr, + list pattern = []> + : LAInst.ret, opnstr, pattern> { + bits<2> imm2; + bits<5> xj; + bits<5> rd; + + let Inst{31-0} = op; + let Inst{11-10} = imm2; + let Inst{9-5} = xj; + let Inst{4-0} = rd; +} + +// 2RI3-type +// +class Fmt2RI3_XXI op, dag outs, dag ins, string opnstr, + list pattern = []> + : LAInst.ret, opnstr, pattern> { + bits<3> imm3; + bits<5> xj; + bits<5> xd; + + let Inst{31-0} = op; + let Inst{12-10} = imm3; + let Inst{9-5} = xj; + let Inst{4-0} = xd; +} + +// +class Fmt2RI3_XRI op, dag outs, dag ins, string opnstr, + list pattern = []> + : LAInst.ret, opnstr, pattern> { + bits<3> imm3; + bits<5> rj; + bits<5> xd; + + let Inst{31-0} = op; + let Inst{12-10} = imm3; + let Inst{9-5} = rj; + let Inst{4-0} = xd; +} + +// +class Fmt2RI3_RXI op, dag outs, dag ins, string opnstr, + list pattern = []> + : LAInst.ret, opnstr, pattern> { + bits<3> imm3; + bits<5> xj; + bits<5> rd; + + let Inst{31-0} = op; + let Inst{12-10} = imm3; + let Inst{9-5} = xj; + let Inst{4-0} = rd; +} + +// 2RI4-type +// +class Fmt2RI4_XXI op, dag outs, dag ins, string opnstr, + list pattern = []> + : LAInst.ret, opnstr, pattern> { + bits<4> imm4; + bits<5> xj; + bits<5> xd; + + let Inst{31-0} = op; + let Inst{13-10} = imm4; + let Inst{9-5} = xj; + let Inst{4-0} = xd; +} + +// +class Fmt2RI4_XRI op, dag outs, dag ins, string opnstr, + list pattern = []> + : LAInst.ret, opnstr, pattern> { + bits<4> imm4; + bits<5> rj; + bits<5> xd; + + let Inst{31-0} = op; + let Inst{13-10} = imm4; + let Inst{9-5} = rj; + let Inst{4-0} = xd; +} + +// +class Fmt2RI4_RXI op, dag outs, dag ins, string opnstr, + list pattern = []> + : LAInst.ret, opnstr, pattern> { + bits<4> imm4; + bits<5> xj; + bits<5> rd; + + let Inst{31-0} = op; + let Inst{13-10} = imm4; + let Inst{9-5} = xj; + let Inst{4-0} = rd; +} + +// 2RI5-type +// +class Fmt2RI5_XXI op, dag outs, dag ins, string opnstr, + list pattern = []> + : LAInst.ret, opnstr, pattern> { + bits<5> imm5; + bits<5> xj; + bits<5> xd; + + let Inst{31-0} = op; + let Inst{14-10} = imm5; + let Inst{9-5} = xj; + let Inst{4-0} = xd; +} + +// 2RI6-type +// +class Fmt2RI6_XXI op, dag outs, dag ins, string opnstr, + list pattern = []> + : LAInst.ret, opnstr, pattern> { + bits<6> imm6; + bits<5> xj; + bits<5> xd; + + let Inst{31-0} = op; + let Inst{15-10} = imm6; + let Inst{9-5} = xj; + let Inst{4-0} = xd; +} + +// 2RI7-type +// +class Fmt2RI7_XXI op, dag outs, dag ins, string opnstr, + list pattern = []> + : LAInst.ret, opnstr, pattern> { + bits<7> imm7; + bits<5> xj; + bits<5> xd; + + let Inst{31-0} = op; + let Inst{16-10} = imm7; + let Inst{9-5} = xj; + let Inst{4-0} = xd; +} + +// 2RI8-type +// +class Fmt2RI8_XXI op, dag outs, dag ins, string opnstr, + list pattern = []> + : LAInst.ret, opnstr, pattern> { + bits<8> imm8; + bits<5> xj; + bits<5> xd; + + let Inst{31-0} = op; + let Inst{17-10} = imm8; + let Inst{9-5} = xj; + let Inst{4-0} = xd; +} + +// 2RI8I2-type +// +class Fmt2RI8I2_XRII op, dag outs, dag ins, string opnstr, + list pattern = []> + : LAInst.ret, opnstr, pattern> { + bits<2> imm2; + bits<8> imm8; + bits<5> rj; + bits<5> xd; + + let Inst{31-0} = op; + let Inst{19-18} = imm2; + let Inst{17-10} = imm8; + let Inst{9-5} = rj; + let Inst{4-0} = xd; +} + +// 2RI8I3-type +// +class Fmt2RI8I3_XRII op, dag outs, dag ins, string opnstr, + list pattern = []> + : LAInst.ret, opnstr, pattern> { + bits<3> imm3; + bits<8> imm8; + bits<5> rj; + bits<5> xd; + + let Inst{31-0} = op; + let Inst{20-18} = imm3; + let Inst{17-10} = imm8; + let Inst{9-5} = rj; + let Inst{4-0} = xd; +} + +// 2RI8I4-type +// +class Fmt2RI8I4_XRII op, dag outs, dag ins, string opnstr, + list pattern = []> + : LAInst.ret, opnstr, pattern> { + bits<4> imm4; + bits<8> imm8; + bits<5> rj; + bits<5> xd; + + let Inst{31-0} = op; + let Inst{21-18} = imm4; + let Inst{17-10} = imm8; + let Inst{9-5} = rj; + let Inst{4-0} = xd; +} + +// 2RI8I5-type +// +class Fmt2RI8I5_XRII op, dag outs, dag ins, string opnstr, + list pattern = []> + : LAInst.ret, opnstr, pattern> { + bits<5> imm5; + bits<8> imm8; + bits<5> rj; + bits<5> xd; + + let Inst{31-0} = op; + let Inst{22-18} = imm5; + let Inst{17-10} = imm8; + let Inst{9-5} = rj; + let Inst{4-0} = xd; +} + +// 2RI9-type +// +class Fmt2RI9_XRI op, dag outs, dag ins, string opnstr, + list pattern = []> + : LAInst.ret, opnstr, pattern> { + bits<9> imm9; + bits<5> rj; + bits<5> xd; + + let Inst{31-0} = op; + let Inst{18-10} = imm9; + let Inst{9-5} = rj; + let Inst{4-0} = xd; +} + +// 2RI10-type +// +class Fmt2RI10_XRI op, dag outs, dag ins, string opnstr, + list pattern = []> + : LAInst.ret, opnstr, pattern> { + bits<10> imm10; + bits<5> rj; + bits<5> xd; + + let Inst{31-0} = op; + let Inst{19-10} = imm10; + let Inst{9-5} = rj; + let Inst{4-0} = xd; +} + +// 2RI11-type +// +class Fmt2RI11_XRI op, dag outs, dag ins, string opnstr, + list pattern = []> + : LAInst.ret, opnstr, pattern> { + bits<11> imm11; + bits<5> rj; + bits<5> xd; + + let Inst{31-0} = op; + let Inst{20-10} = imm11; + let Inst{9-5} = rj; + let Inst{4-0} = xd; +} + +// 2RI12-type +// +class Fmt2RI12_XRI op, dag outs, dag ins, string opnstr, + list pattern = []> + : LAInst.ret, opnstr, pattern> { + bits<12> imm12; + bits<5> rj; + bits<5> xd; + + let Inst{31-0} = op; + let Inst{21-10} = imm12; + let Inst{9-5} = rj; + let Inst{4-0} = xd; +} + +// 3R-type +// +class Fmt3R_XXX op, dag outs, dag ins, string opnstr, + list pattern = []> + : LAInst.ret, opnstr, pattern> { + bits<5> xk; + bits<5> xj; + bits<5> xd; + + let Inst{31-0} = op; + let Inst{14-10} = xk; + let Inst{9-5} = xj; + let Inst{4-0} = xd; +} + +// +class Fmt3R_XXR op, dag outs, dag ins, string opnstr, + list pattern = []> + : LAInst.ret, opnstr, pattern> { + bits<5> rk; + bits<5> xj; + bits<5> xd; + + let Inst{31-0} = op; + let Inst{14-10} = rk; + let Inst{9-5} = xj; + let Inst{4-0} = xd; +} + +// +class Fmt3R_XRR op, dag outs, dag ins, string opnstr, + list pattern = []> + : LAInst.ret, opnstr, pattern> { + bits<5> rk; + bits<5> rj; + bits<5> xd; + + let Inst{31-0} = op; + let Inst{14-10} = rk; + let Inst{9-5} = rj; + let Inst{4-0} = xd; +} + +// 4R-type +// +class Fmt4R_XXXX op, dag outs, dag ins, string opnstr, + list pattern = []> + : LAInst.ret, opnstr, pattern> { + bits<5> xa; + bits<5> xk; + bits<5> xj; + bits<5> xd; + + let Inst{31-0} = op; + let Inst{19-15} = xa; + let Inst{14-10} = xk; + let Inst{9-5} = xj; + let Inst{4-0} = xd; +} diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td new file mode 100644 index 0000000000000000000000000000000000000000..5b6721cdf1b42035ec5f1f7964d5ed15938221cf --- /dev/null +++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td @@ -0,0 +1,2118 @@ +//=- LoongArchLASXInstrInfo.td - LoongArch LASX instructions -*- tablegen -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file describes the Advanced SIMD extension instructions. +// +//===----------------------------------------------------------------------===// + +def loongarch_xvpermi: SDNode<"LoongArchISD::XVPERMI", SDT_loongArchV1RUimm>; + +def lasxsplati8 + : PatFrag<(ops node:$e0), + (v32i8 (build_vector node:$e0, node:$e0, node:$e0, node:$e0, + node:$e0, node:$e0, node:$e0, node:$e0, + node:$e0, node:$e0, node:$e0, node:$e0, + node:$e0, node:$e0, node:$e0, node:$e0, + node:$e0, node:$e0, node:$e0, node:$e0, + node:$e0, node:$e0, node:$e0, node:$e0, + node:$e0, node:$e0, node:$e0, node:$e0, + node:$e0, node:$e0, node:$e0, node:$e0))>; +def lasxsplati16 + : PatFrag<(ops node:$e0), + (v16i16 (build_vector node:$e0, node:$e0, node:$e0, node:$e0, + node:$e0, node:$e0, node:$e0, node:$e0, + node:$e0, node:$e0, node:$e0, node:$e0, + node:$e0, node:$e0, node:$e0, node:$e0))>; +def lasxsplati32 + : PatFrag<(ops node:$e0), + (v8i32 (build_vector node:$e0, node:$e0, node:$e0, node:$e0, + node:$e0, node:$e0, node:$e0, node:$e0))>; +def lasxsplati64 + : PatFrag<(ops node:$e0), + (v4i64 (build_vector node:$e0, node:$e0, node:$e0, node:$e0))>; +def lasxsplatf32 + : PatFrag<(ops node:$e0), + (v8f32 (build_vector node:$e0, node:$e0, node:$e0, node:$e0, + node:$e0, node:$e0, node:$e0, node:$e0))>; +def lasxsplatf64 + : PatFrag<(ops node:$e0), + (v4f64 (build_vector node:$e0, node:$e0, node:$e0, node:$e0))>; + +//===----------------------------------------------------------------------===// +// Instruction class templates +//===----------------------------------------------------------------------===// + +class LASX1RI13_XI op, Operand ImmOpnd = simm13> + : Fmt1RI13_XI; + +class LASX2R_XX op> + : Fmt2R_XX; + +class LASX2R_XR op> + : Fmt2R_XR; + +class LASX2R_CX op> + : Fmt2R_CX; + +class LASX2RI1_XXI op, Operand ImmOpnd = uimm1> + : Fmt2RI1_XXI; + +class LASX2RI2_XXI op, Operand ImmOpnd = uimm2> + : Fmt2RI2_XXI; + +class LASX2RI2_RXI op, Operand ImmOpnd = uimm2> + : Fmt2RI2_RXI; + +class LASX2RI3_XXI op, Operand ImmOpnd = uimm3> + : Fmt2RI3_XXI; + +class LASX2RI3_RXI op, Operand ImmOpnd = uimm3> + : Fmt2RI3_RXI; + +class LASX2RI4_XXI op, Operand ImmOpnd = uimm4> + : Fmt2RI4_XXI; + +class LASX2RI4_XRI op, Operand ImmOpnd = uimm4> + : Fmt2RI4_XRI; + +class LASX2RI4_RXI op, Operand ImmOpnd = uimm4> + : Fmt2RI4_RXI; + +class LASX2RI5_XXI op, Operand ImmOpnd = uimm5> + : Fmt2RI5_XXI; + +class LASX2RI6_XXI op, Operand ImmOpnd = uimm6> + : Fmt2RI6_XXI; + +class LASX2RI8_XXI op, Operand ImmOpnd = uimm8> + : Fmt2RI8_XXI; + +class LASX2RI8I2_XRII op, Operand ImmOpnd = simm8, + Operand IdxOpnd = uimm2> + : Fmt2RI8I2_XRII; +class LASX2RI8I3_XRII op, Operand ImmOpnd = simm8, + Operand IdxOpnd = uimm3> + : Fmt2RI8I3_XRII; +class LASX2RI8I4_XRII op, Operand ImmOpnd = simm8, + Operand IdxOpnd = uimm4> + : Fmt2RI8I4_XRII; +class LASX2RI8I5_XRII op, Operand ImmOpnd = simm8, + Operand IdxOpnd = uimm5> + : Fmt2RI8I5_XRII; + +class LASX3R_XXX op> + : Fmt3R_XXX; + +class LASX3R_XXR op> + : Fmt3R_XXR; + +class LASX4R_XXXX op> + : Fmt4R_XXXX; + +let Constraints = "$xd = $dst" in { + +class LASX2RI2_XXXI op, Operand ImmOpnd = uimm2> + : Fmt2RI2_XXI; +class LASX2RI3_XXXI op, Operand ImmOpnd = uimm3> + : Fmt2RI3_XXI; + +class LASX2RI2_XXRI op, Operand ImmOpnd = uimm2> + : Fmt2RI2_XRI; +class LASX2RI3_XXRI op, Operand ImmOpnd = uimm3> + : Fmt2RI3_XRI; + +class LASX2RI4_XXXI op, Operand ImmOpnd = uimm4> + : Fmt2RI4_XXI; +class LASX2RI5_XXXI op, Operand ImmOpnd = uimm5> + : Fmt2RI5_XXI; +class LASX2RI6_XXXI op, Operand ImmOpnd = uimm6> + : Fmt2RI6_XXI; +class LASX2RI7_XXXI op, Operand ImmOpnd = uimm7> + : Fmt2RI7_XXI; + +class LASX2RI8_XXXI op, Operand ImmOpnd = uimm8> + : Fmt2RI8_XXI; + +class LASX3R_XXXX op> + : Fmt3R_XXX; + +} // Constraints = "$xd = $dst" + +class LASX2RI9_Load op, Operand ImmOpnd = simm9_lsl3> + : Fmt2RI9_XRI; +class LASX2RI10_Load op, Operand ImmOpnd = simm10_lsl2> + : Fmt2RI10_XRI; +class LASX2RI11_Load op, Operand ImmOpnd = simm11_lsl1> + : Fmt2RI11_XRI; +class LASX2RI12_Load op, Operand ImmOpnd = simm12> + : Fmt2RI12_XRI; +class LASX2RI12_Store op, Operand ImmOpnd = simm12> + : Fmt2RI12_XRI; + +class LASX3R_Load op> + : Fmt3R_XRR; +class LASX3R_Store op> + : Fmt3R_XRR; + +//===----------------------------------------------------------------------===// +// Instructions +//===----------------------------------------------------------------------===// + +let hasSideEffects = 0, Predicates = [HasExtLASX] in { + +let mayLoad = 0, mayStore = 0 in { +def XVADD_B : LASX3R_XXX<0x740a0000>; +def XVADD_H : LASX3R_XXX<0x740a8000>; +def XVADD_W : LASX3R_XXX<0x740b0000>; +def XVADD_D : LASX3R_XXX<0x740b8000>; +def XVADD_Q : LASX3R_XXX<0x752d0000>; + +def XVSUB_B : LASX3R_XXX<0x740c0000>; +def XVSUB_H : LASX3R_XXX<0x740c8000>; +def XVSUB_W : LASX3R_XXX<0x740d0000>; +def XVSUB_D : LASX3R_XXX<0x740d8000>; +def XVSUB_Q : LASX3R_XXX<0x752d8000>; + +def XVADDI_BU : LASX2RI5_XXI<0x768a0000>; +def XVADDI_HU : LASX2RI5_XXI<0x768a8000>; +def XVADDI_WU : LASX2RI5_XXI<0x768b0000>; +def XVADDI_DU : LASX2RI5_XXI<0x768b8000>; + +def XVSUBI_BU : LASX2RI5_XXI<0x768c0000>; +def XVSUBI_HU : LASX2RI5_XXI<0x768c8000>; +def XVSUBI_WU : LASX2RI5_XXI<0x768d0000>; +def XVSUBI_DU : LASX2RI5_XXI<0x768d8000>; + +def XVNEG_B : LASX2R_XX<0x769c3000>; +def XVNEG_H : LASX2R_XX<0x769c3400>; +def XVNEG_W : LASX2R_XX<0x769c3800>; +def XVNEG_D : LASX2R_XX<0x769c3c00>; + +def XVSADD_B : LASX3R_XXX<0x74460000>; +def XVSADD_H : LASX3R_XXX<0x74468000>; +def XVSADD_W : LASX3R_XXX<0x74470000>; +def XVSADD_D : LASX3R_XXX<0x74478000>; +def XVSADD_BU : LASX3R_XXX<0x744a0000>; +def XVSADD_HU : LASX3R_XXX<0x744a8000>; +def XVSADD_WU : LASX3R_XXX<0x744b0000>; +def XVSADD_DU : LASX3R_XXX<0x744b8000>; + +def XVSSUB_B : LASX3R_XXX<0x74480000>; +def XVSSUB_H : LASX3R_XXX<0x74488000>; +def XVSSUB_W : LASX3R_XXX<0x74490000>; +def XVSSUB_D : LASX3R_XXX<0x74498000>; +def XVSSUB_BU : LASX3R_XXX<0x744c0000>; +def XVSSUB_HU : LASX3R_XXX<0x744c8000>; +def XVSSUB_WU : LASX3R_XXX<0x744d0000>; +def XVSSUB_DU : LASX3R_XXX<0x744d8000>; + +def XVHADDW_H_B : LASX3R_XXX<0x74540000>; +def XVHADDW_W_H : LASX3R_XXX<0x74548000>; +def XVHADDW_D_W : LASX3R_XXX<0x74550000>; +def XVHADDW_Q_D : LASX3R_XXX<0x74558000>; +def XVHADDW_HU_BU : LASX3R_XXX<0x74580000>; +def XVHADDW_WU_HU : LASX3R_XXX<0x74588000>; +def XVHADDW_DU_WU : LASX3R_XXX<0x74590000>; +def XVHADDW_QU_DU : LASX3R_XXX<0x74598000>; + +def XVHSUBW_H_B : LASX3R_XXX<0x74560000>; +def XVHSUBW_W_H : LASX3R_XXX<0x74568000>; +def XVHSUBW_D_W : LASX3R_XXX<0x74570000>; +def XVHSUBW_Q_D : LASX3R_XXX<0x74578000>; +def XVHSUBW_HU_BU : LASX3R_XXX<0x745a0000>; +def XVHSUBW_WU_HU : LASX3R_XXX<0x745a8000>; +def XVHSUBW_DU_WU : LASX3R_XXX<0x745b0000>; +def XVHSUBW_QU_DU : LASX3R_XXX<0x745b8000>; + +def XVADDWEV_H_B : LASX3R_XXX<0x741e0000>; +def XVADDWEV_W_H : LASX3R_XXX<0x741e8000>; +def XVADDWEV_D_W : LASX3R_XXX<0x741f0000>; +def XVADDWEV_Q_D : LASX3R_XXX<0x741f8000>; +def XVADDWOD_H_B : LASX3R_XXX<0x74220000>; +def XVADDWOD_W_H : LASX3R_XXX<0x74228000>; +def XVADDWOD_D_W : LASX3R_XXX<0x74230000>; +def XVADDWOD_Q_D : LASX3R_XXX<0x74238000>; + +def XVSUBWEV_H_B : LASX3R_XXX<0x74200000>; +def XVSUBWEV_W_H : LASX3R_XXX<0x74208000>; +def XVSUBWEV_D_W : LASX3R_XXX<0x74210000>; +def XVSUBWEV_Q_D : LASX3R_XXX<0x74218000>; +def XVSUBWOD_H_B : LASX3R_XXX<0x74240000>; +def XVSUBWOD_W_H : LASX3R_XXX<0x74248000>; +def XVSUBWOD_D_W : LASX3R_XXX<0x74250000>; +def XVSUBWOD_Q_D : LASX3R_XXX<0x74258000>; + +def XVADDWEV_H_BU : LASX3R_XXX<0x742e0000>; +def XVADDWEV_W_HU : LASX3R_XXX<0x742e8000>; +def XVADDWEV_D_WU : LASX3R_XXX<0x742f0000>; +def XVADDWEV_Q_DU : LASX3R_XXX<0x742f8000>; +def XVADDWOD_H_BU : LASX3R_XXX<0x74320000>; +def XVADDWOD_W_HU : LASX3R_XXX<0x74328000>; +def XVADDWOD_D_WU : LASX3R_XXX<0x74330000>; +def XVADDWOD_Q_DU : LASX3R_XXX<0x74338000>; + +def XVSUBWEV_H_BU : LASX3R_XXX<0x74300000>; +def XVSUBWEV_W_HU : LASX3R_XXX<0x74308000>; +def XVSUBWEV_D_WU : LASX3R_XXX<0x74310000>; +def XVSUBWEV_Q_DU : LASX3R_XXX<0x74318000>; +def XVSUBWOD_H_BU : LASX3R_XXX<0x74340000>; +def XVSUBWOD_W_HU : LASX3R_XXX<0x74348000>; +def XVSUBWOD_D_WU : LASX3R_XXX<0x74350000>; +def XVSUBWOD_Q_DU : LASX3R_XXX<0x74358000>; + +def XVADDWEV_H_BU_B : LASX3R_XXX<0x743e0000>; +def XVADDWEV_W_HU_H : LASX3R_XXX<0x743e8000>; +def XVADDWEV_D_WU_W : LASX3R_XXX<0x743f0000>; +def XVADDWEV_Q_DU_D : LASX3R_XXX<0x743f8000>; +def XVADDWOD_H_BU_B : LASX3R_XXX<0x74400000>; +def XVADDWOD_W_HU_H : LASX3R_XXX<0x74408000>; +def XVADDWOD_D_WU_W : LASX3R_XXX<0x74410000>; +def XVADDWOD_Q_DU_D : LASX3R_XXX<0x74418000>; + +def XVAVG_B : LASX3R_XXX<0x74640000>; +def XVAVG_H : LASX3R_XXX<0x74648000>; +def XVAVG_W : LASX3R_XXX<0x74650000>; +def XVAVG_D : LASX3R_XXX<0x74658000>; +def XVAVG_BU : LASX3R_XXX<0x74660000>; +def XVAVG_HU : LASX3R_XXX<0x74668000>; +def XVAVG_WU : LASX3R_XXX<0x74670000>; +def XVAVG_DU : LASX3R_XXX<0x74678000>; +def XVAVGR_B : LASX3R_XXX<0x74680000>; +def XVAVGR_H : LASX3R_XXX<0x74688000>; +def XVAVGR_W : LASX3R_XXX<0x74690000>; +def XVAVGR_D : LASX3R_XXX<0x74698000>; +def XVAVGR_BU : LASX3R_XXX<0x746a0000>; +def XVAVGR_HU : LASX3R_XXX<0x746a8000>; +def XVAVGR_WU : LASX3R_XXX<0x746b0000>; +def XVAVGR_DU : LASX3R_XXX<0x746b8000>; + +def XVABSD_B : LASX3R_XXX<0x74600000>; +def XVABSD_H : LASX3R_XXX<0x74608000>; +def XVABSD_W : LASX3R_XXX<0x74610000>; +def XVABSD_D : LASX3R_XXX<0x74618000>; +def XVABSD_BU : LASX3R_XXX<0x74620000>; +def XVABSD_HU : LASX3R_XXX<0x74628000>; +def XVABSD_WU : LASX3R_XXX<0x74630000>; +def XVABSD_DU : LASX3R_XXX<0x74638000>; + +def XVADDA_B : LASX3R_XXX<0x745c0000>; +def XVADDA_H : LASX3R_XXX<0x745c8000>; +def XVADDA_W : LASX3R_XXX<0x745d0000>; +def XVADDA_D : LASX3R_XXX<0x745d8000>; + +def XVMAX_B : LASX3R_XXX<0x74700000>; +def XVMAX_H : LASX3R_XXX<0x74708000>; +def XVMAX_W : LASX3R_XXX<0x74710000>; +def XVMAX_D : LASX3R_XXX<0x74718000>; +def XVMAXI_B : LASX2RI5_XXI<0x76900000, simm5>; +def XVMAXI_H : LASX2RI5_XXI<0x76908000, simm5>; +def XVMAXI_W : LASX2RI5_XXI<0x76910000, simm5>; +def XVMAXI_D : LASX2RI5_XXI<0x76918000, simm5>; +def XVMAX_BU : LASX3R_XXX<0x74740000>; +def XVMAX_HU : LASX3R_XXX<0x74748000>; +def XVMAX_WU : LASX3R_XXX<0x74750000>; +def XVMAX_DU : LASX3R_XXX<0x74758000>; +def XVMAXI_BU : LASX2RI5_XXI<0x76940000>; +def XVMAXI_HU : LASX2RI5_XXI<0x76948000>; +def XVMAXI_WU : LASX2RI5_XXI<0x76950000>; +def XVMAXI_DU : LASX2RI5_XXI<0x76958000>; + +def XVMIN_B : LASX3R_XXX<0x74720000>; +def XVMIN_H : LASX3R_XXX<0x74728000>; +def XVMIN_W : LASX3R_XXX<0x74730000>; +def XVMIN_D : LASX3R_XXX<0x74738000>; +def XVMINI_B : LASX2RI5_XXI<0x76920000, simm5>; +def XVMINI_H : LASX2RI5_XXI<0x76928000, simm5>; +def XVMINI_W : LASX2RI5_XXI<0x76930000, simm5>; +def XVMINI_D : LASX2RI5_XXI<0x76938000, simm5>; +def XVMIN_BU : LASX3R_XXX<0x74760000>; +def XVMIN_HU : LASX3R_XXX<0x74768000>; +def XVMIN_WU : LASX3R_XXX<0x74770000>; +def XVMIN_DU : LASX3R_XXX<0x74778000>; +def XVMINI_BU : LASX2RI5_XXI<0x76960000>; +def XVMINI_HU : LASX2RI5_XXI<0x76968000>; +def XVMINI_WU : LASX2RI5_XXI<0x76970000>; +def XVMINI_DU : LASX2RI5_XXI<0x76978000>; + +def XVMUL_B : LASX3R_XXX<0x74840000>; +def XVMUL_H : LASX3R_XXX<0x74848000>; +def XVMUL_W : LASX3R_XXX<0x74850000>; +def XVMUL_D : LASX3R_XXX<0x74858000>; + +def XVMUH_B : LASX3R_XXX<0x74860000>; +def XVMUH_H : LASX3R_XXX<0x74868000>; +def XVMUH_W : LASX3R_XXX<0x74870000>; +def XVMUH_D : LASX3R_XXX<0x74878000>; +def XVMUH_BU : LASX3R_XXX<0x74880000>; +def XVMUH_HU : LASX3R_XXX<0x74888000>; +def XVMUH_WU : LASX3R_XXX<0x74890000>; +def XVMUH_DU : LASX3R_XXX<0x74898000>; + +def XVMULWEV_H_B : LASX3R_XXX<0x74900000>; +def XVMULWEV_W_H : LASX3R_XXX<0x74908000>; +def XVMULWEV_D_W : LASX3R_XXX<0x74910000>; +def XVMULWEV_Q_D : LASX3R_XXX<0x74918000>; +def XVMULWOD_H_B : LASX3R_XXX<0x74920000>; +def XVMULWOD_W_H : LASX3R_XXX<0x74928000>; +def XVMULWOD_D_W : LASX3R_XXX<0x74930000>; +def XVMULWOD_Q_D : LASX3R_XXX<0x74938000>; +def XVMULWEV_H_BU : LASX3R_XXX<0x74980000>; +def XVMULWEV_W_HU : LASX3R_XXX<0x74988000>; +def XVMULWEV_D_WU : LASX3R_XXX<0x74990000>; +def XVMULWEV_Q_DU : LASX3R_XXX<0x74998000>; +def XVMULWOD_H_BU : LASX3R_XXX<0x749a0000>; +def XVMULWOD_W_HU : LASX3R_XXX<0x749a8000>; +def XVMULWOD_D_WU : LASX3R_XXX<0x749b0000>; +def XVMULWOD_Q_DU : LASX3R_XXX<0x749b8000>; +def XVMULWEV_H_BU_B : LASX3R_XXX<0x74a00000>; +def XVMULWEV_W_HU_H : LASX3R_XXX<0x74a08000>; +def XVMULWEV_D_WU_W : LASX3R_XXX<0x74a10000>; +def XVMULWEV_Q_DU_D : LASX3R_XXX<0x74a18000>; +def XVMULWOD_H_BU_B : LASX3R_XXX<0x74a20000>; +def XVMULWOD_W_HU_H : LASX3R_XXX<0x74a28000>; +def XVMULWOD_D_WU_W : LASX3R_XXX<0x74a30000>; +def XVMULWOD_Q_DU_D : LASX3R_XXX<0x74a38000>; + +def XVMADD_B : LASX3R_XXXX<0x74a80000>; +def XVMADD_H : LASX3R_XXXX<0x74a88000>; +def XVMADD_W : LASX3R_XXXX<0x74a90000>; +def XVMADD_D : LASX3R_XXXX<0x74a98000>; + +def XVMSUB_B : LASX3R_XXXX<0x74aa0000>; +def XVMSUB_H : LASX3R_XXXX<0x74aa8000>; +def XVMSUB_W : LASX3R_XXXX<0x74ab0000>; +def XVMSUB_D : LASX3R_XXXX<0x74ab8000>; + +def XVMADDWEV_H_B : LASX3R_XXXX<0x74ac0000>; +def XVMADDWEV_W_H : LASX3R_XXXX<0x74ac8000>; +def XVMADDWEV_D_W : LASX3R_XXXX<0x74ad0000>; +def XVMADDWEV_Q_D : LASX3R_XXXX<0x74ad8000>; +def XVMADDWOD_H_B : LASX3R_XXXX<0x74ae0000>; +def XVMADDWOD_W_H : LASX3R_XXXX<0x74ae8000>; +def XVMADDWOD_D_W : LASX3R_XXXX<0x74af0000>; +def XVMADDWOD_Q_D : LASX3R_XXXX<0x74af8000>; +def XVMADDWEV_H_BU : LASX3R_XXXX<0x74b40000>; +def XVMADDWEV_W_HU : LASX3R_XXXX<0x74b48000>; +def XVMADDWEV_D_WU : LASX3R_XXXX<0x74b50000>; +def XVMADDWEV_Q_DU : LASX3R_XXXX<0x74b58000>; +def XVMADDWOD_H_BU : LASX3R_XXXX<0x74b60000>; +def XVMADDWOD_W_HU : LASX3R_XXXX<0x74b68000>; +def XVMADDWOD_D_WU : LASX3R_XXXX<0x74b70000>; +def XVMADDWOD_Q_DU : LASX3R_XXXX<0x74b78000>; +def XVMADDWEV_H_BU_B : LASX3R_XXXX<0x74bc0000>; +def XVMADDWEV_W_HU_H : LASX3R_XXXX<0x74bc8000>; +def XVMADDWEV_D_WU_W : LASX3R_XXXX<0x74bd0000>; +def XVMADDWEV_Q_DU_D : LASX3R_XXXX<0x74bd8000>; +def XVMADDWOD_H_BU_B : LASX3R_XXXX<0x74be0000>; +def XVMADDWOD_W_HU_H : LASX3R_XXXX<0x74be8000>; +def XVMADDWOD_D_WU_W : LASX3R_XXXX<0x74bf0000>; +def XVMADDWOD_Q_DU_D : LASX3R_XXXX<0x74bf8000>; + +def XVDIV_B : LASX3R_XXX<0x74e00000>; +def XVDIV_H : LASX3R_XXX<0x74e08000>; +def XVDIV_W : LASX3R_XXX<0x74e10000>; +def XVDIV_D : LASX3R_XXX<0x74e18000>; +def XVDIV_BU : LASX3R_XXX<0x74e40000>; +def XVDIV_HU : LASX3R_XXX<0x74e48000>; +def XVDIV_WU : LASX3R_XXX<0x74e50000>; +def XVDIV_DU : LASX3R_XXX<0x74e58000>; + +def XVMOD_B : LASX3R_XXX<0x74e20000>; +def XVMOD_H : LASX3R_XXX<0x74e28000>; +def XVMOD_W : LASX3R_XXX<0x74e30000>; +def XVMOD_D : LASX3R_XXX<0x74e38000>; +def XVMOD_BU : LASX3R_XXX<0x74e60000>; +def XVMOD_HU : LASX3R_XXX<0x74e68000>; +def XVMOD_WU : LASX3R_XXX<0x74e70000>; +def XVMOD_DU : LASX3R_XXX<0x74e78000>; + +def XVSAT_B : LASX2RI3_XXI<0x77242000>; +def XVSAT_H : LASX2RI4_XXI<0x77244000>; +def XVSAT_W : LASX2RI5_XXI<0x77248000>; +def XVSAT_D : LASX2RI6_XXI<0x77250000>; +def XVSAT_BU : LASX2RI3_XXI<0x77282000>; +def XVSAT_HU : LASX2RI4_XXI<0x77284000>; +def XVSAT_WU : LASX2RI5_XXI<0x77288000>; +def XVSAT_DU : LASX2RI6_XXI<0x77290000>; + +def XVEXTH_H_B : LASX2R_XX<0x769ee000>; +def XVEXTH_W_H : LASX2R_XX<0x769ee400>; +def XVEXTH_D_W : LASX2R_XX<0x769ee800>; +def XVEXTH_Q_D : LASX2R_XX<0x769eec00>; +def XVEXTH_HU_BU : LASX2R_XX<0x769ef000>; +def XVEXTH_WU_HU : LASX2R_XX<0x769ef400>; +def XVEXTH_DU_WU : LASX2R_XX<0x769ef800>; +def XVEXTH_QU_DU : LASX2R_XX<0x769efc00>; + +def VEXT2XV_H_B : LASX2R_XX<0x769f1000>; +def VEXT2XV_W_B : LASX2R_XX<0x769f1400>; +def VEXT2XV_D_B : LASX2R_XX<0x769f1800>; +def VEXT2XV_W_H : LASX2R_XX<0x769f1c00>; +def VEXT2XV_D_H : LASX2R_XX<0x769f2000>; +def VEXT2XV_D_W : LASX2R_XX<0x769f2400>; +def VEXT2XV_HU_BU : LASX2R_XX<0x769f2800>; +def VEXT2XV_WU_BU : LASX2R_XX<0x769f2c00>; +def VEXT2XV_DU_BU : LASX2R_XX<0x769f3000>; +def VEXT2XV_WU_HU : LASX2R_XX<0x769f3400>; +def VEXT2XV_DU_HU : LASX2R_XX<0x769f3800>; +def VEXT2XV_DU_WU : LASX2R_XX<0x769f3c00>; + +def XVHSELI_D : LASX2RI5_XXI<0x769f8000>; + +def XVSIGNCOV_B : LASX3R_XXX<0x752e0000>; +def XVSIGNCOV_H : LASX3R_XXX<0x752e8000>; +def XVSIGNCOV_W : LASX3R_XXX<0x752f0000>; +def XVSIGNCOV_D : LASX3R_XXX<0x752f8000>; + +def XVMSKLTZ_B : LASX2R_XX<0x769c4000>; +def XVMSKLTZ_H : LASX2R_XX<0x769c4400>; +def XVMSKLTZ_W : LASX2R_XX<0x769c4800>; +def XVMSKLTZ_D : LASX2R_XX<0x769c4c00>; + +def XVMSKGEZ_B : LASX2R_XX<0x769c5000>; + +def XVMSKNZ_B : LASX2R_XX<0x769c6000>; + +def XVLDI : LASX1RI13_XI<0x77e00000>; + +def XVAND_V : LASX3R_XXX<0x75260000>; +def XVOR_V : LASX3R_XXX<0x75268000>; +def XVXOR_V : LASX3R_XXX<0x75270000>; +def XVNOR_V : LASX3R_XXX<0x75278000>; +def XVANDN_V : LASX3R_XXX<0x75280000>; +def XVORN_V : LASX3R_XXX<0x75288000>; + +def XVANDI_B : LASX2RI8_XXI<0x77d00000>; +def XVORI_B : LASX2RI8_XXI<0x77d40000>; +def XVXORI_B : LASX2RI8_XXI<0x77d80000>; +def XVNORI_B : LASX2RI8_XXI<0x77dc0000>; + +def XVSLL_B : LASX3R_XXX<0x74e80000>; +def XVSLL_H : LASX3R_XXX<0x74e88000>; +def XVSLL_W : LASX3R_XXX<0x74e90000>; +def XVSLL_D : LASX3R_XXX<0x74e98000>; +def XVSLLI_B : LASX2RI3_XXI<0x772c2000>; +def XVSLLI_H : LASX2RI4_XXI<0x772c4000>; +def XVSLLI_W : LASX2RI5_XXI<0x772c8000>; +def XVSLLI_D : LASX2RI6_XXI<0x772d0000>; + +def XVSRL_B : LASX3R_XXX<0x74ea0000>; +def XVSRL_H : LASX3R_XXX<0x74ea8000>; +def XVSRL_W : LASX3R_XXX<0x74eb0000>; +def XVSRL_D : LASX3R_XXX<0x74eb8000>; +def XVSRLI_B : LASX2RI3_XXI<0x77302000>; +def XVSRLI_H : LASX2RI4_XXI<0x77304000>; +def XVSRLI_W : LASX2RI5_XXI<0x77308000>; +def XVSRLI_D : LASX2RI6_XXI<0x77310000>; + +def XVSRA_B : LASX3R_XXX<0x74ec0000>; +def XVSRA_H : LASX3R_XXX<0x74ec8000>; +def XVSRA_W : LASX3R_XXX<0x74ed0000>; +def XVSRA_D : LASX3R_XXX<0x74ed8000>; +def XVSRAI_B : LASX2RI3_XXI<0x77342000>; +def XVSRAI_H : LASX2RI4_XXI<0x77344000>; +def XVSRAI_W : LASX2RI5_XXI<0x77348000>; +def XVSRAI_D : LASX2RI6_XXI<0x77350000>; + +def XVROTR_B : LASX3R_XXX<0x74ee0000>; +def XVROTR_H : LASX3R_XXX<0x74ee8000>; +def XVROTR_W : LASX3R_XXX<0x74ef0000>; +def XVROTR_D : LASX3R_XXX<0x74ef8000>; +def XVROTRI_B : LASX2RI3_XXI<0x76a02000>; +def XVROTRI_H : LASX2RI4_XXI<0x76a04000>; +def XVROTRI_W : LASX2RI5_XXI<0x76a08000>; +def XVROTRI_D : LASX2RI6_XXI<0x76a10000>; + +def XVSLLWIL_H_B : LASX2RI3_XXI<0x77082000>; +def XVSLLWIL_W_H : LASX2RI4_XXI<0x77084000>; +def XVSLLWIL_D_W : LASX2RI5_XXI<0x77088000>; +def XVEXTL_Q_D : LASX2R_XX<0x77090000>; +def XVSLLWIL_HU_BU : LASX2RI3_XXI<0x770c2000>; +def XVSLLWIL_WU_HU : LASX2RI4_XXI<0x770c4000>; +def XVSLLWIL_DU_WU : LASX2RI5_XXI<0x770c8000>; +def XVEXTL_QU_DU : LASX2R_XX<0x770d0000>; + +def XVSRLR_B : LASX3R_XXX<0x74f00000>; +def XVSRLR_H : LASX3R_XXX<0x74f08000>; +def XVSRLR_W : LASX3R_XXX<0x74f10000>; +def XVSRLR_D : LASX3R_XXX<0x74f18000>; +def XVSRLRI_B : LASX2RI3_XXI<0x76a42000>; +def XVSRLRI_H : LASX2RI4_XXI<0x76a44000>; +def XVSRLRI_W : LASX2RI5_XXI<0x76a48000>; +def XVSRLRI_D : LASX2RI6_XXI<0x76a50000>; + +def XVSRAR_B : LASX3R_XXX<0x74f20000>; +def XVSRAR_H : LASX3R_XXX<0x74f28000>; +def XVSRAR_W : LASX3R_XXX<0x74f30000>; +def XVSRAR_D : LASX3R_XXX<0x74f38000>; +def XVSRARI_B : LASX2RI3_XXI<0x76a82000>; +def XVSRARI_H : LASX2RI4_XXI<0x76a84000>; +def XVSRARI_W : LASX2RI5_XXI<0x76a88000>; +def XVSRARI_D : LASX2RI6_XXI<0x76a90000>; + +def XVSRLN_B_H : LASX3R_XXX<0x74f48000>; +def XVSRLN_H_W : LASX3R_XXX<0x74f50000>; +def XVSRLN_W_D : LASX3R_XXX<0x74f58000>; +def XVSRAN_B_H : LASX3R_XXX<0x74f68000>; +def XVSRAN_H_W : LASX3R_XXX<0x74f70000>; +def XVSRAN_W_D : LASX3R_XXX<0x74f78000>; + +def XVSRLNI_B_H : LASX2RI4_XXXI<0x77404000>; +def XVSRLNI_H_W : LASX2RI5_XXXI<0x77408000>; +def XVSRLNI_W_D : LASX2RI6_XXXI<0x77410000>; +def XVSRLNI_D_Q : LASX2RI7_XXXI<0x77420000>; +def XVSRANI_B_H : LASX2RI4_XXXI<0x77584000>; +def XVSRANI_H_W : LASX2RI5_XXXI<0x77588000>; +def XVSRANI_W_D : LASX2RI6_XXXI<0x77590000>; +def XVSRANI_D_Q : LASX2RI7_XXXI<0x775a0000>; + +def XVSRLRN_B_H : LASX3R_XXX<0x74f88000>; +def XVSRLRN_H_W : LASX3R_XXX<0x74f90000>; +def XVSRLRN_W_D : LASX3R_XXX<0x74f98000>; +def XVSRARN_B_H : LASX3R_XXX<0x74fa8000>; +def XVSRARN_H_W : LASX3R_XXX<0x74fb0000>; +def XVSRARN_W_D : LASX3R_XXX<0x74fb8000>; + +def XVSRLRNI_B_H : LASX2RI4_XXXI<0x77444000>; +def XVSRLRNI_H_W : LASX2RI5_XXXI<0x77448000>; +def XVSRLRNI_W_D : LASX2RI6_XXXI<0x77450000>; +def XVSRLRNI_D_Q : LASX2RI7_XXXI<0x77460000>; +def XVSRARNI_B_H : LASX2RI4_XXXI<0x775c4000>; +def XVSRARNI_H_W : LASX2RI5_XXXI<0x775c8000>; +def XVSRARNI_W_D : LASX2RI6_XXXI<0x775d0000>; +def XVSRARNI_D_Q : LASX2RI7_XXXI<0x775e0000>; + +def XVSSRLN_B_H : LASX3R_XXX<0x74fc8000>; +def XVSSRLN_H_W : LASX3R_XXX<0x74fd0000>; +def XVSSRLN_W_D : LASX3R_XXX<0x74fd8000>; +def XVSSRAN_B_H : LASX3R_XXX<0x74fe8000>; +def XVSSRAN_H_W : LASX3R_XXX<0x74ff0000>; +def XVSSRAN_W_D : LASX3R_XXX<0x74ff8000>; +def XVSSRLN_BU_H : LASX3R_XXX<0x75048000>; +def XVSSRLN_HU_W : LASX3R_XXX<0x75050000>; +def XVSSRLN_WU_D : LASX3R_XXX<0x75058000>; +def XVSSRAN_BU_H : LASX3R_XXX<0x75068000>; +def XVSSRAN_HU_W : LASX3R_XXX<0x75070000>; +def XVSSRAN_WU_D : LASX3R_XXX<0x75078000>; + +def XVSSRLNI_B_H : LASX2RI4_XXXI<0x77484000>; +def XVSSRLNI_H_W : LASX2RI5_XXXI<0x77488000>; +def XVSSRLNI_W_D : LASX2RI6_XXXI<0x77490000>; +def XVSSRLNI_D_Q : LASX2RI7_XXXI<0x774a0000>; +def XVSSRANI_B_H : LASX2RI4_XXXI<0x77604000>; +def XVSSRANI_H_W : LASX2RI5_XXXI<0x77608000>; +def XVSSRANI_W_D : LASX2RI6_XXXI<0x77610000>; +def XVSSRANI_D_Q : LASX2RI7_XXXI<0x77620000>; +def XVSSRLNI_BU_H : LASX2RI4_XXXI<0x774c4000>; +def XVSSRLNI_HU_W : LASX2RI5_XXXI<0x774c8000>; +def XVSSRLNI_WU_D : LASX2RI6_XXXI<0x774d0000>; +def XVSSRLNI_DU_Q : LASX2RI7_XXXI<0x774e0000>; +def XVSSRANI_BU_H : LASX2RI4_XXXI<0x77644000>; +def XVSSRANI_HU_W : LASX2RI5_XXXI<0x77648000>; +def XVSSRANI_WU_D : LASX2RI6_XXXI<0x77650000>; +def XVSSRANI_DU_Q : LASX2RI7_XXXI<0x77660000>; + +def XVSSRLRN_B_H : LASX3R_XXX<0x75008000>; +def XVSSRLRN_H_W : LASX3R_XXX<0x75010000>; +def XVSSRLRN_W_D : LASX3R_XXX<0x75018000>; +def XVSSRARN_B_H : LASX3R_XXX<0x75028000>; +def XVSSRARN_H_W : LASX3R_XXX<0x75030000>; +def XVSSRARN_W_D : LASX3R_XXX<0x75038000>; +def XVSSRLRN_BU_H : LASX3R_XXX<0x75088000>; +def XVSSRLRN_HU_W : LASX3R_XXX<0x75090000>; +def XVSSRLRN_WU_D : LASX3R_XXX<0x75098000>; +def XVSSRARN_BU_H : LASX3R_XXX<0x750a8000>; +def XVSSRARN_HU_W : LASX3R_XXX<0x750b0000>; +def XVSSRARN_WU_D : LASX3R_XXX<0x750b8000>; + +def XVSSRLRNI_B_H : LASX2RI4_XXXI<0x77504000>; +def XVSSRLRNI_H_W : LASX2RI5_XXXI<0x77508000>; +def XVSSRLRNI_W_D : LASX2RI6_XXXI<0x77510000>; +def XVSSRLRNI_D_Q : LASX2RI7_XXXI<0x77520000>; +def XVSSRARNI_B_H : LASX2RI4_XXXI<0x77684000>; +def XVSSRARNI_H_W : LASX2RI5_XXXI<0x77688000>; +def XVSSRARNI_W_D : LASX2RI6_XXXI<0x77690000>; +def XVSSRARNI_D_Q : LASX2RI7_XXXI<0x776a0000>; +def XVSSRLRNI_BU_H : LASX2RI4_XXXI<0x77544000>; +def XVSSRLRNI_HU_W : LASX2RI5_XXXI<0x77548000>; +def XVSSRLRNI_WU_D : LASX2RI6_XXXI<0x77550000>; +def XVSSRLRNI_DU_Q : LASX2RI7_XXXI<0x77560000>; +def XVSSRARNI_BU_H : LASX2RI4_XXXI<0x776c4000>; +def XVSSRARNI_HU_W : LASX2RI5_XXXI<0x776c8000>; +def XVSSRARNI_WU_D : LASX2RI6_XXXI<0x776d0000>; +def XVSSRARNI_DU_Q : LASX2RI7_XXXI<0x776e0000>; + +def XVCLO_B : LASX2R_XX<0x769c0000>; +def XVCLO_H : LASX2R_XX<0x769c0400>; +def XVCLO_W : LASX2R_XX<0x769c0800>; +def XVCLO_D : LASX2R_XX<0x769c0c00>; +def XVCLZ_B : LASX2R_XX<0x769c1000>; +def XVCLZ_H : LASX2R_XX<0x769c1400>; +def XVCLZ_W : LASX2R_XX<0x769c1800>; +def XVCLZ_D : LASX2R_XX<0x769c1c00>; + +def XVPCNT_B : LASX2R_XX<0x769c2000>; +def XVPCNT_H : LASX2R_XX<0x769c2400>; +def XVPCNT_W : LASX2R_XX<0x769c2800>; +def XVPCNT_D : LASX2R_XX<0x769c2c00>; + +def XVBITCLR_B : LASX3R_XXX<0x750c0000>; +def XVBITCLR_H : LASX3R_XXX<0x750c8000>; +def XVBITCLR_W : LASX3R_XXX<0x750d0000>; +def XVBITCLR_D : LASX3R_XXX<0x750d8000>; +def XVBITCLRI_B : LASX2RI3_XXI<0x77102000>; +def XVBITCLRI_H : LASX2RI4_XXI<0x77104000>; +def XVBITCLRI_W : LASX2RI5_XXI<0x77108000>; +def XVBITCLRI_D : LASX2RI6_XXI<0x77110000>; + +def XVBITSET_B : LASX3R_XXX<0x750e0000>; +def XVBITSET_H : LASX3R_XXX<0x750e8000>; +def XVBITSET_W : LASX3R_XXX<0x750f0000>; +def XVBITSET_D : LASX3R_XXX<0x750f8000>; +def XVBITSETI_B : LASX2RI3_XXI<0x77142000>; +def XVBITSETI_H : LASX2RI4_XXI<0x77144000>; +def XVBITSETI_W : LASX2RI5_XXI<0x77148000>; +def XVBITSETI_D : LASX2RI6_XXI<0x77150000>; + +def XVBITREV_B : LASX3R_XXX<0x75100000>; +def XVBITREV_H : LASX3R_XXX<0x75108000>; +def XVBITREV_W : LASX3R_XXX<0x75110000>; +def XVBITREV_D : LASX3R_XXX<0x75118000>; +def XVBITREVI_B : LASX2RI3_XXI<0x77182000>; +def XVBITREVI_H : LASX2RI4_XXI<0x77184000>; +def XVBITREVI_W : LASX2RI5_XXI<0x77188000>; +def XVBITREVI_D : LASX2RI6_XXI<0x77190000>; + +def XVFRSTP_B : LASX3R_XXXX<0x752b0000>; +def XVFRSTP_H : LASX3R_XXXX<0x752b8000>; +def XVFRSTPI_B : LASX2RI5_XXXI<0x769a0000>; +def XVFRSTPI_H : LASX2RI5_XXXI<0x769a8000>; + +def XVFADD_S : LASX3R_XXX<0x75308000>; +def XVFADD_D : LASX3R_XXX<0x75310000>; +def XVFSUB_S : LASX3R_XXX<0x75328000>; +def XVFSUB_D : LASX3R_XXX<0x75330000>; +def XVFMUL_S : LASX3R_XXX<0x75388000>; +def XVFMUL_D : LASX3R_XXX<0x75390000>; +def XVFDIV_S : LASX3R_XXX<0x753a8000>; +def XVFDIV_D : LASX3R_XXX<0x753b0000>; + +def XVFMADD_S : LASX4R_XXXX<0x0a100000>; +def XVFMADD_D : LASX4R_XXXX<0x0a200000>; +def XVFMSUB_S : LASX4R_XXXX<0x0a500000>; +def XVFMSUB_D : LASX4R_XXXX<0x0a600000>; +def XVFNMADD_S : LASX4R_XXXX<0x0a900000>; +def XVFNMADD_D : LASX4R_XXXX<0x0aa00000>; +def XVFNMSUB_S : LASX4R_XXXX<0x0ad00000>; +def XVFNMSUB_D : LASX4R_XXXX<0x0ae00000>; + +def XVFMAX_S : LASX3R_XXX<0x753c8000>; +def XVFMAX_D : LASX3R_XXX<0x753d0000>; +def XVFMIN_S : LASX3R_XXX<0x753e8000>; +def XVFMIN_D : LASX3R_XXX<0x753f0000>; + +def XVFMAXA_S : LASX3R_XXX<0x75408000>; +def XVFMAXA_D : LASX3R_XXX<0x75410000>; +def XVFMINA_S : LASX3R_XXX<0x75428000>; +def XVFMINA_D : LASX3R_XXX<0x75430000>; + +def XVFLOGB_S : LASX2R_XX<0x769cc400>; +def XVFLOGB_D : LASX2R_XX<0x769cc800>; + +def XVFCLASS_S : LASX2R_XX<0x769cd400>; +def XVFCLASS_D : LASX2R_XX<0x769cd800>; + +def XVFSQRT_S : LASX2R_XX<0x769ce400>; +def XVFSQRT_D : LASX2R_XX<0x769ce800>; +def XVFRECIP_S : LASX2R_XX<0x769cf400>; +def XVFRECIP_D : LASX2R_XX<0x769cf800>; +def XVFRSQRT_S : LASX2R_XX<0x769d0400>; +def XVFRSQRT_D : LASX2R_XX<0x769d0800>; + +def XVFCVTL_S_H : LASX2R_XX<0x769de800>; +def XVFCVTH_S_H : LASX2R_XX<0x769dec00>; +def XVFCVTL_D_S : LASX2R_XX<0x769df000>; +def XVFCVTH_D_S : LASX2R_XX<0x769df400>; +def XVFCVT_H_S : LASX3R_XXX<0x75460000>; +def XVFCVT_S_D : LASX3R_XXX<0x75468000>; + +def XVFRINTRNE_S : LASX2R_XX<0x769d7400>; +def XVFRINTRNE_D : LASX2R_XX<0x769d7800>; +def XVFRINTRZ_S : LASX2R_XX<0x769d6400>; +def XVFRINTRZ_D : LASX2R_XX<0x769d6800>; +def XVFRINTRP_S : LASX2R_XX<0x769d5400>; +def XVFRINTRP_D : LASX2R_XX<0x769d5800>; +def XVFRINTRM_S : LASX2R_XX<0x769d4400>; +def XVFRINTRM_D : LASX2R_XX<0x769d4800>; +def XVFRINT_S : LASX2R_XX<0x769d3400>; +def XVFRINT_D : LASX2R_XX<0x769d3800>; + +def XVFTINTRNE_W_S : LASX2R_XX<0x769e5000>; +def XVFTINTRNE_L_D : LASX2R_XX<0x769e5400>; +def XVFTINTRZ_W_S : LASX2R_XX<0x769e4800>; +def XVFTINTRZ_L_D : LASX2R_XX<0x769e4c00>; +def XVFTINTRP_W_S : LASX2R_XX<0x769e4000>; +def XVFTINTRP_L_D : LASX2R_XX<0x769e4400>; +def XVFTINTRM_W_S : LASX2R_XX<0x769e3800>; +def XVFTINTRM_L_D : LASX2R_XX<0x769e3c00>; +def XVFTINT_W_S : LASX2R_XX<0x769e3000>; +def XVFTINT_L_D : LASX2R_XX<0x769e3400>; +def XVFTINTRZ_WU_S : LASX2R_XX<0x769e7000>; +def XVFTINTRZ_LU_D : LASX2R_XX<0x769e7400>; +def XVFTINT_WU_S : LASX2R_XX<0x769e5800>; +def XVFTINT_LU_D : LASX2R_XX<0x769e5c00>; + +def XVFTINTRNE_W_D : LASX3R_XXX<0x754b8000>; +def XVFTINTRZ_W_D : LASX3R_XXX<0x754b0000>; +def XVFTINTRP_W_D : LASX3R_XXX<0x754a8000>; +def XVFTINTRM_W_D : LASX3R_XXX<0x754a0000>; +def XVFTINT_W_D : LASX3R_XXX<0x75498000>; + +def XVFTINTRNEL_L_S : LASX2R_XX<0x769ea000>; +def XVFTINTRNEH_L_S : LASX2R_XX<0x769ea400>; +def XVFTINTRZL_L_S : LASX2R_XX<0x769e9800>; +def XVFTINTRZH_L_S : LASX2R_XX<0x769e9c00>; +def XVFTINTRPL_L_S : LASX2R_XX<0x769e9000>; +def XVFTINTRPH_L_S : LASX2R_XX<0x769e9400>; +def XVFTINTRML_L_S : LASX2R_XX<0x769e8800>; +def XVFTINTRMH_L_S : LASX2R_XX<0x769e8c00>; +def XVFTINTL_L_S : LASX2R_XX<0x769e8000>; +def XVFTINTH_L_S : LASX2R_XX<0x769e8400>; + +def XVFFINT_S_W : LASX2R_XX<0x769e0000>; +def XVFFINT_D_L : LASX2R_XX<0x769e0800>; +def XVFFINT_S_WU : LASX2R_XX<0x769e0400>; +def XVFFINT_D_LU : LASX2R_XX<0x769e0c00>; +def XVFFINTL_D_W : LASX2R_XX<0x769e1000>; +def XVFFINTH_D_W : LASX2R_XX<0x769e1400>; +def XVFFINT_S_L : LASX3R_XXX<0x75480000>; + +def XVSEQ_B : LASX3R_XXX<0x74000000>; +def XVSEQ_H : LASX3R_XXX<0x74008000>; +def XVSEQ_W : LASX3R_XXX<0x74010000>; +def XVSEQ_D : LASX3R_XXX<0x74018000>; +def XVSEQI_B : LASX2RI5_XXI<0x76800000, simm5>; +def XVSEQI_H : LASX2RI5_XXI<0x76808000, simm5>; +def XVSEQI_W : LASX2RI5_XXI<0x76810000, simm5>; +def XVSEQI_D : LASX2RI5_XXI<0x76818000, simm5>; + +def XVSLE_B : LASX3R_XXX<0x74020000>; +def XVSLE_H : LASX3R_XXX<0x74028000>; +def XVSLE_W : LASX3R_XXX<0x74030000>; +def XVSLE_D : LASX3R_XXX<0x74038000>; +def XVSLEI_B : LASX2RI5_XXI<0x76820000, simm5>; +def XVSLEI_H : LASX2RI5_XXI<0x76828000, simm5>; +def XVSLEI_W : LASX2RI5_XXI<0x76830000, simm5>; +def XVSLEI_D : LASX2RI5_XXI<0x76838000, simm5>; + +def XVSLE_BU : LASX3R_XXX<0x74040000>; +def XVSLE_HU : LASX3R_XXX<0x74048000>; +def XVSLE_WU : LASX3R_XXX<0x74050000>; +def XVSLE_DU : LASX3R_XXX<0x74058000>; +def XVSLEI_BU : LASX2RI5_XXI<0x76840000>; +def XVSLEI_HU : LASX2RI5_XXI<0x76848000>; +def XVSLEI_WU : LASX2RI5_XXI<0x76850000>; +def XVSLEI_DU : LASX2RI5_XXI<0x76858000>; + +def XVSLT_B : LASX3R_XXX<0x74060000>; +def XVSLT_H : LASX3R_XXX<0x74068000>; +def XVSLT_W : LASX3R_XXX<0x74070000>; +def XVSLT_D : LASX3R_XXX<0x74078000>; +def XVSLTI_B : LASX2RI5_XXI<0x76860000, simm5>; +def XVSLTI_H : LASX2RI5_XXI<0x76868000, simm5>; +def XVSLTI_W : LASX2RI5_XXI<0x76870000, simm5>; +def XVSLTI_D : LASX2RI5_XXI<0x76878000, simm5>; + +def XVSLT_BU : LASX3R_XXX<0x74080000>; +def XVSLT_HU : LASX3R_XXX<0x74088000>; +def XVSLT_WU : LASX3R_XXX<0x74090000>; +def XVSLT_DU : LASX3R_XXX<0x74098000>; +def XVSLTI_BU : LASX2RI5_XXI<0x76880000>; +def XVSLTI_HU : LASX2RI5_XXI<0x76888000>; +def XVSLTI_WU : LASX2RI5_XXI<0x76890000>; +def XVSLTI_DU : LASX2RI5_XXI<0x76898000>; + +def XVFCMP_CAF_S : LASX3R_XXX<0x0c900000>; +def XVFCMP_SAF_S : LASX3R_XXX<0x0c908000>; +def XVFCMP_CLT_S : LASX3R_XXX<0x0c910000>; +def XVFCMP_SLT_S : LASX3R_XXX<0x0c918000>; +def XVFCMP_CEQ_S : LASX3R_XXX<0x0c920000>; +def XVFCMP_SEQ_S : LASX3R_XXX<0x0c928000>; +def XVFCMP_CLE_S : LASX3R_XXX<0x0c930000>; +def XVFCMP_SLE_S : LASX3R_XXX<0x0c938000>; +def XVFCMP_CUN_S : LASX3R_XXX<0x0c940000>; +def XVFCMP_SUN_S : LASX3R_XXX<0x0c948000>; +def XVFCMP_CULT_S : LASX3R_XXX<0x0c950000>; +def XVFCMP_SULT_S : LASX3R_XXX<0x0c958000>; +def XVFCMP_CUEQ_S : LASX3R_XXX<0x0c960000>; +def XVFCMP_SUEQ_S : LASX3R_XXX<0x0c968000>; +def XVFCMP_CULE_S : LASX3R_XXX<0x0c970000>; +def XVFCMP_SULE_S : LASX3R_XXX<0x0c978000>; +def XVFCMP_CNE_S : LASX3R_XXX<0x0c980000>; +def XVFCMP_SNE_S : LASX3R_XXX<0x0c988000>; +def XVFCMP_COR_S : LASX3R_XXX<0x0c9a0000>; +def XVFCMP_SOR_S : LASX3R_XXX<0x0c9a8000>; +def XVFCMP_CUNE_S : LASX3R_XXX<0x0c9c0000>; +def XVFCMP_SUNE_S : LASX3R_XXX<0x0c9c8000>; + +def XVFCMP_CAF_D : LASX3R_XXX<0x0ca00000>; +def XVFCMP_SAF_D : LASX3R_XXX<0x0ca08000>; +def XVFCMP_CLT_D : LASX3R_XXX<0x0ca10000>; +def XVFCMP_SLT_D : LASX3R_XXX<0x0ca18000>; +def XVFCMP_CEQ_D : LASX3R_XXX<0x0ca20000>; +def XVFCMP_SEQ_D : LASX3R_XXX<0x0ca28000>; +def XVFCMP_CLE_D : LASX3R_XXX<0x0ca30000>; +def XVFCMP_SLE_D : LASX3R_XXX<0x0ca38000>; +def XVFCMP_CUN_D : LASX3R_XXX<0x0ca40000>; +def XVFCMP_SUN_D : LASX3R_XXX<0x0ca48000>; +def XVFCMP_CULT_D : LASX3R_XXX<0x0ca50000>; +def XVFCMP_SULT_D : LASX3R_XXX<0x0ca58000>; +def XVFCMP_CUEQ_D : LASX3R_XXX<0x0ca60000>; +def XVFCMP_SUEQ_D : LASX3R_XXX<0x0ca68000>; +def XVFCMP_CULE_D : LASX3R_XXX<0x0ca70000>; +def XVFCMP_SULE_D : LASX3R_XXX<0x0ca78000>; +def XVFCMP_CNE_D : LASX3R_XXX<0x0ca80000>; +def XVFCMP_SNE_D : LASX3R_XXX<0x0ca88000>; +def XVFCMP_COR_D : LASX3R_XXX<0x0caa0000>; +def XVFCMP_SOR_D : LASX3R_XXX<0x0caa8000>; +def XVFCMP_CUNE_D : LASX3R_XXX<0x0cac0000>; +def XVFCMP_SUNE_D : LASX3R_XXX<0x0cac8000>; + +def XVBITSEL_V : LASX4R_XXXX<0x0d200000>; + +def XVBITSELI_B : LASX2RI8_XXXI<0x77c40000>; + +def XVSETEQZ_V : LASX2R_CX<0x769c9800>; +def XVSETNEZ_V : LASX2R_CX<0x769c9c00>; +def XVSETANYEQZ_B : LASX2R_CX<0x769ca000>; +def XVSETANYEQZ_H : LASX2R_CX<0x769ca400>; +def XVSETANYEQZ_W : LASX2R_CX<0x769ca800>; +def XVSETANYEQZ_D : LASX2R_CX<0x769cac00>; +def XVSETALLNEZ_B : LASX2R_CX<0x769cb000>; +def XVSETALLNEZ_H : LASX2R_CX<0x769cb400>; +def XVSETALLNEZ_W : LASX2R_CX<0x769cb800>; +def XVSETALLNEZ_D : LASX2R_CX<0x769cbc00>; + +def XVINSGR2VR_W : LASX2RI3_XXRI<0x76ebc000>; +def XVINSGR2VR_D : LASX2RI2_XXRI<0x76ebe000>; +def XVPICKVE2GR_W : LASX2RI3_RXI<0x76efc000>; +def XVPICKVE2GR_D : LASX2RI2_RXI<0x76efe000>; +def XVPICKVE2GR_WU : LASX2RI3_RXI<0x76f3c000>; +def XVPICKVE2GR_DU : LASX2RI2_RXI<0x76f3e000>; + +def XVREPLGR2VR_B : LASX2R_XR<0x769f0000>; +def XVREPLGR2VR_H : LASX2R_XR<0x769f0400>; +def XVREPLGR2VR_W : LASX2R_XR<0x769f0800>; +def XVREPLGR2VR_D : LASX2R_XR<0x769f0c00>; + +def XVREPLVE_B : LASX3R_XXR<0x75220000>; +def XVREPLVE_H : LASX3R_XXR<0x75228000>; +def XVREPLVE_W : LASX3R_XXR<0x75230000>; +def XVREPLVE_D : LASX3R_XXR<0x75238000>; +def XVREPL128VEI_B : LASX2RI4_XXI<0x76f78000>; +def XVREPL128VEI_H : LASX2RI3_XXI<0x76f7c000>; +def XVREPL128VEI_W : LASX2RI2_XXI<0x76f7e000>; +def XVREPL128VEI_D : LASX2RI1_XXI<0x76f7f000>; + +def XVREPLVE0_B : LASX2R_XX<0x77070000>; +def XVREPLVE0_H : LASX2R_XX<0x77078000>; +def XVREPLVE0_W : LASX2R_XX<0x7707c000>; +def XVREPLVE0_D : LASX2R_XX<0x7707e000>; +def XVREPLVE0_Q : LASX2R_XX<0x7707f000>; + +def XVINSVE0_W : LASX2RI3_XXXI<0x76ffc000>; +def XVINSVE0_D : LASX2RI2_XXXI<0x76ffe000>; + +def XVPICKVE_W : LASX2RI3_XXI<0x7703c000>; +def XVPICKVE_D : LASX2RI2_XXI<0x7703e000>; + +def XVBSLL_V : LASX2RI5_XXI<0x768e0000>; +def XVBSRL_V : LASX2RI5_XXI<0x768e8000>; + +def XVPACKEV_B : LASX3R_XXX<0x75160000>; +def XVPACKEV_H : LASX3R_XXX<0x75168000>; +def XVPACKEV_W : LASX3R_XXX<0x75170000>; +def XVPACKEV_D : LASX3R_XXX<0x75178000>; +def XVPACKOD_B : LASX3R_XXX<0x75180000>; +def XVPACKOD_H : LASX3R_XXX<0x75188000>; +def XVPACKOD_W : LASX3R_XXX<0x75190000>; +def XVPACKOD_D : LASX3R_XXX<0x75198000>; + +def XVPICKEV_B : LASX3R_XXX<0x751e0000>; +def XVPICKEV_H : LASX3R_XXX<0x751e8000>; +def XVPICKEV_W : LASX3R_XXX<0x751f0000>; +def XVPICKEV_D : LASX3R_XXX<0x751f8000>; +def XVPICKOD_B : LASX3R_XXX<0x75200000>; +def XVPICKOD_H : LASX3R_XXX<0x75208000>; +def XVPICKOD_W : LASX3R_XXX<0x75210000>; +def XVPICKOD_D : LASX3R_XXX<0x75218000>; + +def XVILVL_B : LASX3R_XXX<0x751a0000>; +def XVILVL_H : LASX3R_XXX<0x751a8000>; +def XVILVL_W : LASX3R_XXX<0x751b0000>; +def XVILVL_D : LASX3R_XXX<0x751b8000>; +def XVILVH_B : LASX3R_XXX<0x751c0000>; +def XVILVH_H : LASX3R_XXX<0x751c8000>; +def XVILVH_W : LASX3R_XXX<0x751d0000>; +def XVILVH_D : LASX3R_XXX<0x751d8000>; + +def XVSHUF_B : LASX4R_XXXX<0x0d600000>; + +def XVSHUF_H : LASX3R_XXXX<0x757a8000>; +def XVSHUF_W : LASX3R_XXXX<0x757b0000>; +def XVSHUF_D : LASX3R_XXXX<0x757b8000>; + +def XVPERM_W : LASX3R_XXX<0x757d0000>; + +def XVSHUF4I_B : LASX2RI8_XXI<0x77900000>; +def XVSHUF4I_H : LASX2RI8_XXI<0x77940000>; +def XVSHUF4I_W : LASX2RI8_XXI<0x77980000>; +def XVSHUF4I_D : LASX2RI8_XXXI<0x779c0000>; + +def XVPERMI_W : LASX2RI8_XXXI<0x77e40000>; +def XVPERMI_D : LASX2RI8_XXI<0x77e80000>; +def XVPERMI_Q : LASX2RI8_XXXI<0x77ec0000>; + +def XVEXTRINS_D : LASX2RI8_XXXI<0x77800000>; +def XVEXTRINS_W : LASX2RI8_XXXI<0x77840000>; +def XVEXTRINS_H : LASX2RI8_XXXI<0x77880000>; +def XVEXTRINS_B : LASX2RI8_XXXI<0x778c0000>; +} // mayLoad = 0, mayStore = 0 + +let mayLoad = 1, mayStore = 0 in { +def XVLD : LASX2RI12_Load<0x2c800000>; +def XVLDX : LASX3R_Load<0x38480000>; + +def XVLDREPL_B : LASX2RI12_Load<0x32800000>; +def XVLDREPL_H : LASX2RI11_Load<0x32400000>; +def XVLDREPL_W : LASX2RI10_Load<0x32200000>; +def XVLDREPL_D : LASX2RI9_Load<0x32100000>; +} // mayLoad = 1, mayStore = 0 + +let mayLoad = 0, mayStore = 1 in { +def XVST : LASX2RI12_Store<0x2cc00000>; +def XVSTX : LASX3R_Store<0x384c0000>; + +def XVSTELM_B : LASX2RI8I5_XRII<0x33800000>; +def XVSTELM_H : LASX2RI8I4_XRII<0x33400000, simm8_lsl1>; +def XVSTELM_W : LASX2RI8I3_XRII<0x33200000, simm8_lsl2>; +def XVSTELM_D : LASX2RI8I2_XRII<0x33100000, simm8_lsl3>; +} // mayLoad = 0, mayStore = 1 + +} // hasSideEffects = 0, Predicates = [HasExtLASX] + +/// Pseudo-instructions + +let Predicates = [HasExtLASX] in { + +let hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCodeGenOnly = 0, + isAsmParserOnly = 1 in { +def PseudoXVREPLI_B : Pseudo<(outs LASX256:$xd), (ins simm10:$imm), [], + "xvrepli.b", "$xd, $imm">; +def PseudoXVREPLI_H : Pseudo<(outs LASX256:$xd), (ins simm10:$imm), [], + "xvrepli.h", "$xd, $imm">; +def PseudoXVREPLI_W : Pseudo<(outs LASX256:$xd), (ins simm10:$imm), [], + "xvrepli.w", "$xd, $imm">; +def PseudoXVREPLI_D : Pseudo<(outs LASX256:$xd), (ins simm10:$imm), [], + "xvrepli.d", "$xd, $imm">; +} + +def PseudoXVBNZ_B : VecCond; +def PseudoXVBNZ_H : VecCond; +def PseudoXVBNZ_W : VecCond; +def PseudoXVBNZ_D : VecCond; +def PseudoXVBNZ : VecCond; + +def PseudoXVBZ_B : VecCond; +def PseudoXVBZ_H : VecCond; +def PseudoXVBZ_W : VecCond; +def PseudoXVBZ_D : VecCond; +def PseudoXVBZ : VecCond; + +let usesCustomInserter = 1, Constraints = "$xd = $dst" in { +def PseudoXVINSGR2VR_B + : Pseudo<(outs LASX256:$dst), (ins LASX256:$xd, GPR:$rj, uimm5:$imm)>; +def PseudoXVINSGR2VR_H + : Pseudo<(outs LASX256:$dst), (ins LASX256:$xd, GPR:$rj, uimm4:$imm)>; +} // usesCustomInserter = 1, Constraints = "$xd = $dst" + +} // Predicates = [HasExtLASX] + +multiclass PatXr { + def : Pat<(v32i8 (OpNode (v32i8 LASX256:$xj))), + (!cast(Inst#"_B") LASX256:$xj)>; + def : Pat<(v16i16 (OpNode (v16i16 LASX256:$xj))), + (!cast(Inst#"_H") LASX256:$xj)>; + def : Pat<(v8i32 (OpNode (v8i32 LASX256:$xj))), + (!cast(Inst#"_W") LASX256:$xj)>; + def : Pat<(v4i64 (OpNode (v4i64 LASX256:$xj))), + (!cast(Inst#"_D") LASX256:$xj)>; +} + +multiclass PatXrF { + def : Pat<(v8f32 (OpNode (v8f32 LASX256:$xj))), + (!cast(Inst#"_S") LASX256:$xj)>; + def : Pat<(v4f64 (OpNode (v4f64 LASX256:$xj))), + (!cast(Inst#"_D") LASX256:$xj)>; +} + +multiclass PatXrXr { + def : Pat<(OpNode (v32i8 LASX256:$xj), (v32i8 LASX256:$xk)), + (!cast(Inst#"_B") LASX256:$xj, LASX256:$xk)>; + def : Pat<(OpNode (v16i16 LASX256:$xj), (v16i16 LASX256:$xk)), + (!cast(Inst#"_H") LASX256:$xj, LASX256:$xk)>; + def : Pat<(OpNode (v8i32 LASX256:$xj), (v8i32 LASX256:$xk)), + (!cast(Inst#"_W") LASX256:$xj, LASX256:$xk)>; + def : Pat<(OpNode (v4i64 LASX256:$xj), (v4i64 LASX256:$xk)), + (!cast(Inst#"_D") LASX256:$xj, LASX256:$xk)>; +} + +multiclass PatXrXrF { + def : Pat<(OpNode (v8f32 LASX256:$xj), (v8f32 LASX256:$xk)), + (!cast(Inst#"_S") LASX256:$xj, LASX256:$xk)>; + def : Pat<(OpNode (v4f64 LASX256:$xj), (v4f64 LASX256:$xk)), + (!cast(Inst#"_D") LASX256:$xj, LASX256:$xk)>; +} + +multiclass PatXrXrU { + def : Pat<(OpNode (v32i8 LASX256:$xj), (v32i8 LASX256:$xk)), + (!cast(Inst#"_BU") LASX256:$xj, LASX256:$xk)>; + def : Pat<(OpNode (v16i16 LASX256:$xj), (v16i16 LASX256:$xk)), + (!cast(Inst#"_HU") LASX256:$xj, LASX256:$xk)>; + def : Pat<(OpNode (v8i32 LASX256:$xj), (v8i32 LASX256:$xk)), + (!cast(Inst#"_WU") LASX256:$xj, LASX256:$xk)>; + def : Pat<(OpNode (v4i64 LASX256:$xj), (v4i64 LASX256:$xk)), + (!cast(Inst#"_DU") LASX256:$xj, LASX256:$xk)>; +} + +multiclass PatXrSimm5 { + def : Pat<(OpNode (v32i8 LASX256:$xj), (v32i8 (SplatPat_simm5 simm5:$imm))), + (!cast(Inst#"_B") LASX256:$xj, simm5:$imm)>; + def : Pat<(OpNode (v16i16 LASX256:$xj), (v16i16 (SplatPat_simm5 simm5:$imm))), + (!cast(Inst#"_H") LASX256:$xj, simm5:$imm)>; + def : Pat<(OpNode (v8i32 LASX256:$xj), (v8i32 (SplatPat_simm5 simm5:$imm))), + (!cast(Inst#"_W") LASX256:$xj, simm5:$imm)>; + def : Pat<(OpNode (v4i64 LASX256:$xj), (v4i64 (SplatPat_simm5 simm5:$imm))), + (!cast(Inst#"_D") LASX256:$xj, simm5:$imm)>; +} + +multiclass PatXrUimm5 { + def : Pat<(OpNode (v32i8 LASX256:$xj), (v32i8 (SplatPat_uimm5 uimm5:$imm))), + (!cast(Inst#"_BU") LASX256:$xj, uimm5:$imm)>; + def : Pat<(OpNode (v16i16 LASX256:$xj), (v16i16 (SplatPat_uimm5 uimm5:$imm))), + (!cast(Inst#"_HU") LASX256:$xj, uimm5:$imm)>; + def : Pat<(OpNode (v8i32 LASX256:$xj), (v8i32 (SplatPat_uimm5 uimm5:$imm))), + (!cast(Inst#"_WU") LASX256:$xj, uimm5:$imm)>; + def : Pat<(OpNode (v4i64 LASX256:$xj), (v4i64 (SplatPat_uimm5 uimm5:$imm))), + (!cast(Inst#"_DU") LASX256:$xj, uimm5:$imm)>; +} + +multiclass PatXrXrXr { + def : Pat<(OpNode (v32i8 LASX256:$xd), (v32i8 LASX256:$xj), + (v32i8 LASX256:$xk)), + (!cast(Inst#"_B") LASX256:$xd, LASX256:$xj, LASX256:$xk)>; + def : Pat<(OpNode (v16i16 LASX256:$xd), (v16i16 LASX256:$xj), + (v16i16 LASX256:$xk)), + (!cast(Inst#"_H") LASX256:$xd, LASX256:$xj, LASX256:$xk)>; + def : Pat<(OpNode (v8i32 LASX256:$xd), (v8i32 LASX256:$xj), + (v8i32 LASX256:$xk)), + (!cast(Inst#"_W") LASX256:$xd, LASX256:$xj, LASX256:$xk)>; + def : Pat<(OpNode (v4i64 LASX256:$xd), (v4i64 LASX256:$xj), + (v4i64 LASX256:$xk)), + (!cast(Inst#"_D") LASX256:$xd, LASX256:$xj, LASX256:$xk)>; +} + +multiclass PatShiftXrXr { + def : Pat<(OpNode (v32i8 LASX256:$xj), (and vsplati8_imm_eq_7, + (v32i8 LASX256:$xk))), + (!cast(Inst#"_B") LASX256:$xj, LASX256:$xk)>; + def : Pat<(OpNode (v16i16 LASX256:$xj), (and vsplati16_imm_eq_15, + (v16i16 LASX256:$xk))), + (!cast(Inst#"_H") LASX256:$xj, LASX256:$xk)>; + def : Pat<(OpNode (v8i32 LASX256:$xj), (and vsplati32_imm_eq_31, + (v8i32 LASX256:$xk))), + (!cast(Inst#"_W") LASX256:$xj, LASX256:$xk)>; + def : Pat<(OpNode (v4i64 LASX256:$xj), (and vsplati64_imm_eq_63, + (v4i64 LASX256:$xk))), + (!cast(Inst#"_D") LASX256:$xj, LASX256:$xk)>; +} + +multiclass PatShiftXrUimm { + def : Pat<(OpNode (v32i8 LASX256:$xj), (v32i8 (SplatPat_uimm3 uimm3:$imm))), + (!cast(Inst#"_B") LASX256:$xj, uimm3:$imm)>; + def : Pat<(OpNode (v16i16 LASX256:$xj), (v16i16 (SplatPat_uimm4 uimm4:$imm))), + (!cast(Inst#"_H") LASX256:$xj, uimm4:$imm)>; + def : Pat<(OpNode (v8i32 LASX256:$xj), (v8i32 (SplatPat_uimm5 uimm5:$imm))), + (!cast(Inst#"_W") LASX256:$xj, uimm5:$imm)>; + def : Pat<(OpNode (v4i64 LASX256:$xj), (v4i64 (SplatPat_uimm6 uimm6:$imm))), + (!cast(Inst#"_D") LASX256:$xj, uimm6:$imm)>; +} + +multiclass PatCCXrSimm5 { + def : Pat<(v32i8 (setcc (v32i8 LASX256:$xj), + (v32i8 (SplatPat_simm5 simm5:$imm)), CC)), + (!cast(Inst#"_B") LASX256:$xj, simm5:$imm)>; + def : Pat<(v16i16 (setcc (v16i16 LASX256:$xj), + (v16i16 (SplatPat_simm5 simm5:$imm)), CC)), + (!cast(Inst#"_H") LASX256:$xj, simm5:$imm)>; + def : Pat<(v8i32 (setcc (v8i32 LASX256:$xj), + (v8i32 (SplatPat_simm5 simm5:$imm)), CC)), + (!cast(Inst#"_W") LASX256:$xj, simm5:$imm)>; + def : Pat<(v4i64 (setcc (v4i64 LASX256:$xj), + (v4i64 (SplatPat_simm5 simm5:$imm)), CC)), + (!cast(Inst#"_D") LASX256:$xj, simm5:$imm)>; +} + +multiclass PatCCXrUimm5 { + def : Pat<(v32i8 (setcc (v32i8 LASX256:$xj), + (v32i8 (SplatPat_uimm5 uimm5:$imm)), CC)), + (!cast(Inst#"_BU") LASX256:$xj, uimm5:$imm)>; + def : Pat<(v16i16 (setcc (v16i16 LASX256:$xj), + (v16i16 (SplatPat_uimm5 uimm5:$imm)), CC)), + (!cast(Inst#"_HU") LASX256:$xj, uimm5:$imm)>; + def : Pat<(v8i32 (setcc (v8i32 LASX256:$xj), + (v8i32 (SplatPat_uimm5 uimm5:$imm)), CC)), + (!cast(Inst#"_WU") LASX256:$xj, uimm5:$imm)>; + def : Pat<(v4i64 (setcc (v4i64 LASX256:$xj), + (v4i64 (SplatPat_uimm5 uimm5:$imm)), CC)), + (!cast(Inst#"_DU") LASX256:$xj, uimm5:$imm)>; +} + +multiclass PatCCXrXr { + def : Pat<(v32i8 (setcc (v32i8 LASX256:$xj), (v32i8 LASX256:$xk), CC)), + (!cast(Inst#"_B") LASX256:$xj, LASX256:$xk)>; + def : Pat<(v16i16 (setcc (v16i16 LASX256:$xj), (v16i16 LASX256:$xk), CC)), + (!cast(Inst#"_H") LASX256:$xj, LASX256:$xk)>; + def : Pat<(v8i32 (setcc (v8i32 LASX256:$xj), (v8i32 LASX256:$xk), CC)), + (!cast(Inst#"_W") LASX256:$xj, LASX256:$xk)>; + def : Pat<(v4i64 (setcc (v4i64 LASX256:$xj), (v4i64 LASX256:$xk), CC)), + (!cast(Inst#"_D") LASX256:$xj, LASX256:$xk)>; +} + +multiclass PatCCXrXrU { + def : Pat<(v32i8 (setcc (v32i8 LASX256:$xj), (v32i8 LASX256:$xk), CC)), + (!cast(Inst#"_BU") LASX256:$xj, LASX256:$xk)>; + def : Pat<(v16i16 (setcc (v16i16 LASX256:$xj), (v16i16 LASX256:$xk), CC)), + (!cast(Inst#"_HU") LASX256:$xj, LASX256:$xk)>; + def : Pat<(v8i32 (setcc (v8i32 LASX256:$xj), (v8i32 LASX256:$xk), CC)), + (!cast(Inst#"_WU") LASX256:$xj, LASX256:$xk)>; + def : Pat<(v4i64 (setcc (v4i64 LASX256:$xj), (v4i64 LASX256:$xk), CC)), + (!cast(Inst#"_DU") LASX256:$xj, LASX256:$xk)>; +} + +multiclass PatCCXrXrF { + def : Pat<(v8i32 (setcc (v8f32 LASX256:$xj), (v8f32 LASX256:$xk), CC)), + (!cast(Inst#"_S") LASX256:$xj, LASX256:$xk)>; + def : Pat<(v4i64 (setcc (v4f64 LASX256:$xj), (v4f64 LASX256:$xk), CC)), + (!cast(Inst#"_D") LASX256:$xj, LASX256:$xk)>; +} + +let Predicates = [HasExtLASX] in { + +// XVADD_{B/H/W/D} +defm : PatXrXr; +// XVSUB_{B/H/W/D} +defm : PatXrXr; + +// XVADDI_{B/H/W/D}U +defm : PatXrUimm5; +// XVSUBI_{B/H/W/D}U +defm : PatXrUimm5; + +// XVNEG_{B/H/W/D} +def : Pat<(sub immAllZerosV, (v32i8 LASX256:$xj)), (XVNEG_B LASX256:$xj)>; +def : Pat<(sub immAllZerosV, (v16i16 LASX256:$xj)), (XVNEG_H LASX256:$xj)>; +def : Pat<(sub immAllZerosV, (v8i32 LASX256:$xj)), (XVNEG_W LASX256:$xj)>; +def : Pat<(sub immAllZerosV, (v4i64 LASX256:$xj)), (XVNEG_D LASX256:$xj)>; + +// XVMAX[I]_{B/H/W/D}[U] +defm : PatXrXr; +defm : PatXrXrU; +defm : PatXrSimm5; +defm : PatXrUimm5; + +// XVMIN[I]_{B/H/W/D}[U] +defm : PatXrXr; +defm : PatXrXrU; +defm : PatXrSimm5; +defm : PatXrUimm5; + +// XVMUL_{B/H/W/D} +defm : PatXrXr; + +// XVMUH_{B/H/W/D}[U] +defm : PatXrXr; +defm : PatXrXrU; + +// XVMADD_{B/H/W/D} +defm : PatXrXrXr; +// XVMSUB_{B/H/W/D} +defm : PatXrXrXr; + +// XVDIV_{B/H/W/D}[U] +defm : PatXrXr; +defm : PatXrXrU; + +// XVMOD_{B/H/W/D}[U] +defm : PatXrXr; +defm : PatXrXrU; + +// XVAND_V +foreach vt = [v32i8, v16i16, v8i32, v4i64] in +def : Pat<(and (vt LASX256:$xj), (vt LASX256:$xk)), + (XVAND_V LASX256:$xj, LASX256:$xk)>; +// XVOR_V +foreach vt = [v32i8, v16i16, v8i32, v4i64] in +def : Pat<(or (vt LASX256:$xj), (vt LASX256:$xk)), + (XVOR_V LASX256:$xj, LASX256:$xk)>; +// XVXOR_V +foreach vt = [v32i8, v16i16, v8i32, v4i64] in +def : Pat<(xor (vt LASX256:$xj), (vt LASX256:$xk)), + (XVXOR_V LASX256:$xj, LASX256:$xk)>; +// XVNOR_V +foreach vt = [v32i8, v16i16, v8i32, v4i64] in +def : Pat<(vnot (or (vt LASX256:$xj), (vt LASX256:$xk))), + (XVNOR_V LASX256:$xj, LASX256:$xk)>; + +// XVANDI_B +def : Pat<(and (v32i8 LASX256:$xj), (v32i8 (SplatPat_uimm8 uimm8:$imm))), + (XVANDI_B LASX256:$xj, uimm8:$imm)>; +// XVORI_B +def : Pat<(or (v32i8 LASX256:$xj), (v32i8 (SplatPat_uimm8 uimm8:$imm))), + (XVORI_B LASX256:$xj, uimm8:$imm)>; + +// XVXORI_B +def : Pat<(xor (v32i8 LASX256:$xj), (v32i8 (SplatPat_uimm8 uimm8:$imm))), + (XVXORI_B LASX256:$xj, uimm8:$imm)>; + +// XVSLL[I]_{B/H/W/D} +defm : PatXrXr; +defm : PatShiftXrXr; +defm : PatShiftXrUimm; + +// XVSRL[I]_{B/H/W/D} +defm : PatXrXr; +defm : PatShiftXrXr; +defm : PatShiftXrUimm; + +// XVSRA[I]_{B/H/W/D} +defm : PatXrXr; +defm : PatShiftXrXr; +defm : PatShiftXrUimm; + +// XVCLZ_{B/H/W/D} +defm : PatXr; + +// XVPCNT_{B/H/W/D} +defm : PatXr; + +// XVBITCLR_{B/H/W/D} +def : Pat<(and v32i8:$xj, (vnot (shl vsplat_imm_eq_1, v32i8:$xk))), + (v32i8 (XVBITCLR_B v32i8:$xj, v32i8:$xk))>; +def : Pat<(and v16i16:$xj, (vnot (shl vsplat_imm_eq_1, v16i16:$xk))), + (v16i16 (XVBITCLR_H v16i16:$xj, v16i16:$xk))>; +def : Pat<(and v8i32:$xj, (vnot (shl vsplat_imm_eq_1, v8i32:$xk))), + (v8i32 (XVBITCLR_W v8i32:$xj, v8i32:$xk))>; +def : Pat<(and v4i64:$xj, (vnot (shl vsplat_imm_eq_1, v4i64:$xk))), + (v4i64 (XVBITCLR_D v4i64:$xj, v4i64:$xk))>; +def : Pat<(and v32i8:$xj, (vnot (shl vsplat_imm_eq_1, + (vsplati8imm7 v32i8:$xk)))), + (v32i8 (XVBITCLR_B v32i8:$xj, v32i8:$xk))>; +def : Pat<(and v16i16:$xj, (vnot (shl vsplat_imm_eq_1, + (vsplati16imm15 v16i16:$xk)))), + (v16i16 (XVBITCLR_H v16i16:$xj, v16i16:$xk))>; +def : Pat<(and v8i32:$xj, (vnot (shl vsplat_imm_eq_1, + (vsplati32imm31 v8i32:$xk)))), + (v8i32 (XVBITCLR_W v8i32:$xj, v8i32:$xk))>; +def : Pat<(and v4i64:$xj, (vnot (shl vsplat_imm_eq_1, + (vsplati64imm63 v4i64:$xk)))), + (v4i64 (XVBITCLR_D v4i64:$xj, v4i64:$xk))>; + +// XVBITCLRI_{B/H/W/D} +def : Pat<(and (v32i8 LASX256:$xj), (v32i8 (vsplat_uimm_inv_pow2 uimm3:$imm))), + (XVBITCLRI_B LASX256:$xj, uimm3:$imm)>; +def : Pat<(and (v16i16 LASX256:$xj), (v16i16 (vsplat_uimm_inv_pow2 uimm4:$imm))), + (XVBITCLRI_H LASX256:$xj, uimm4:$imm)>; +def : Pat<(and (v8i32 LASX256:$xj), (v8i32 (vsplat_uimm_inv_pow2 uimm5:$imm))), + (XVBITCLRI_W LASX256:$xj, uimm5:$imm)>; +def : Pat<(and (v4i64 LASX256:$xj), (v4i64 (vsplat_uimm_inv_pow2 uimm6:$imm))), + (XVBITCLRI_D LASX256:$xj, uimm6:$imm)>; + +// XVBITSET_{B/H/W/D} +def : Pat<(or v32i8:$xj, (shl vsplat_imm_eq_1, v32i8:$xk)), + (v32i8 (XVBITSET_B v32i8:$xj, v32i8:$xk))>; +def : Pat<(or v16i16:$xj, (shl vsplat_imm_eq_1, v16i16:$xk)), + (v16i16 (XVBITSET_H v16i16:$xj, v16i16:$xk))>; +def : Pat<(or v8i32:$xj, (shl vsplat_imm_eq_1, v8i32:$xk)), + (v8i32 (XVBITSET_W v8i32:$xj, v8i32:$xk))>; +def : Pat<(or v4i64:$xj, (shl vsplat_imm_eq_1, v4i64:$xk)), + (v4i64 (XVBITSET_D v4i64:$xj, v4i64:$xk))>; +def : Pat<(or v32i8:$xj, (shl vsplat_imm_eq_1, (vsplati8imm7 v32i8:$xk))), + (v32i8 (XVBITSET_B v32i8:$xj, v32i8:$xk))>; +def : Pat<(or v16i16:$xj, (shl vsplat_imm_eq_1, (vsplati16imm15 v16i16:$xk))), + (v16i16 (XVBITSET_H v16i16:$xj, v16i16:$xk))>; +def : Pat<(or v8i32:$xj, (shl vsplat_imm_eq_1, (vsplati32imm31 v8i32:$xk))), + (v8i32 (XVBITSET_W v8i32:$xj, v8i32:$xk))>; +def : Pat<(or v4i64:$xj, (shl vsplat_imm_eq_1, (vsplati64imm63 v4i64:$xk))), + (v4i64 (XVBITSET_D v4i64:$xj, v4i64:$xk))>; + +// XVBITSETI_{B/H/W/D} +def : Pat<(or (v32i8 LASX256:$xj), (v32i8 (vsplat_uimm_pow2 uimm3:$imm))), + (XVBITSETI_B LASX256:$xj, uimm3:$imm)>; +def : Pat<(or (v16i16 LASX256:$xj), (v16i16 (vsplat_uimm_pow2 uimm4:$imm))), + (XVBITSETI_H LASX256:$xj, uimm4:$imm)>; +def : Pat<(or (v8i32 LASX256:$xj), (v8i32 (vsplat_uimm_pow2 uimm5:$imm))), + (XVBITSETI_W LASX256:$xj, uimm5:$imm)>; +def : Pat<(or (v4i64 LASX256:$xj), (v4i64 (vsplat_uimm_pow2 uimm6:$imm))), + (XVBITSETI_D LASX256:$xj, uimm6:$imm)>; + +// XVBITREV_{B/H/W/D} +def : Pat<(xor v32i8:$xj, (shl vsplat_imm_eq_1, v32i8:$xk)), + (v32i8 (XVBITREV_B v32i8:$xj, v32i8:$xk))>; +def : Pat<(xor v16i16:$xj, (shl vsplat_imm_eq_1, v16i16:$xk)), + (v16i16 (XVBITREV_H v16i16:$xj, v16i16:$xk))>; +def : Pat<(xor v8i32:$xj, (shl vsplat_imm_eq_1, v8i32:$xk)), + (v8i32 (XVBITREV_W v8i32:$xj, v8i32:$xk))>; +def : Pat<(xor v4i64:$xj, (shl vsplat_imm_eq_1, v4i64:$xk)), + (v4i64 (XVBITREV_D v4i64:$xj, v4i64:$xk))>; +def : Pat<(xor v32i8:$xj, (shl vsplat_imm_eq_1, (vsplati8imm7 v32i8:$xk))), + (v32i8 (XVBITREV_B v32i8:$xj, v32i8:$xk))>; +def : Pat<(xor v16i16:$xj, (shl vsplat_imm_eq_1, (vsplati16imm15 v16i16:$xk))), + (v16i16 (XVBITREV_H v16i16:$xj, v16i16:$xk))>; +def : Pat<(xor v8i32:$xj, (shl vsplat_imm_eq_1, (vsplati32imm31 v8i32:$xk))), + (v8i32 (XVBITREV_W v8i32:$xj, v8i32:$xk))>; +def : Pat<(xor v4i64:$xj, (shl vsplat_imm_eq_1, (vsplati64imm63 v4i64:$xk))), + (v4i64 (XVBITREV_D v4i64:$xj, v4i64:$xk))>; + +// XVBITREVI_{B/H/W/D} +def : Pat<(xor (v32i8 LASX256:$xj), (v32i8 (vsplat_uimm_pow2 uimm3:$imm))), + (XVBITREVI_B LASX256:$xj, uimm3:$imm)>; +def : Pat<(xor (v16i16 LASX256:$xj), (v16i16 (vsplat_uimm_pow2 uimm4:$imm))), + (XVBITREVI_H LASX256:$xj, uimm4:$imm)>; +def : Pat<(xor (v8i32 LASX256:$xj), (v8i32 (vsplat_uimm_pow2 uimm5:$imm))), + (XVBITREVI_W LASX256:$xj, uimm5:$imm)>; +def : Pat<(xor (v4i64 LASX256:$xj), (v4i64 (vsplat_uimm_pow2 uimm6:$imm))), + (XVBITREVI_D LASX256:$xj, uimm6:$imm)>; + +// XVFADD_{S/D} +defm : PatXrXrF; + +// XVFSUB_{S/D} +defm : PatXrXrF; + +// XVFMUL_{S/D} +defm : PatXrXrF; + +// XVFDIV_{S/D} +defm : PatXrXrF; + +// XVFMADD_{S/D} +def : Pat<(fma v8f32:$xj, v8f32:$xk, v8f32:$xa), + (XVFMADD_S v8f32:$xj, v8f32:$xk, v8f32:$xa)>; +def : Pat<(fma v4f64:$xj, v4f64:$xk, v4f64:$xa), + (XVFMADD_D v4f64:$xj, v4f64:$xk, v4f64:$xa)>; + +// XVFMSUB_{S/D} +def : Pat<(fma v8f32:$xj, v8f32:$xk, (fneg v8f32:$xa)), + (XVFMSUB_S v8f32:$xj, v8f32:$xk, v8f32:$xa)>; +def : Pat<(fma v4f64:$xj, v4f64:$xk, (fneg v4f64:$xa)), + (XVFMSUB_D v4f64:$xj, v4f64:$xk, v4f64:$xa)>; + +// XVFNMADD_{S/D} +def : Pat<(fneg (fma v8f32:$xj, v8f32:$xk, v8f32:$xa)), + (XVFNMADD_S v8f32:$xj, v8f32:$xk, v8f32:$xa)>; +def : Pat<(fneg (fma v4f64:$xj, v4f64:$xk, v4f64:$xa)), + (XVFNMADD_D v4f64:$xj, v4f64:$xk, v4f64:$xa)>; +def : Pat<(fma_nsz (fneg v8f32:$xj), v8f32:$xk, (fneg v8f32:$xa)), + (XVFNMADD_S v8f32:$xj, v8f32:$xk, v8f32:$xa)>; +def : Pat<(fma_nsz (fneg v4f64:$xj), v4f64:$xk, (fneg v4f64:$xa)), + (XVFNMADD_D v4f64:$xj, v4f64:$xk, v4f64:$xa)>; + +// XVFNMSUB_{S/D} +def : Pat<(fneg (fma v8f32:$xj, v8f32:$xk, (fneg v8f32:$xa))), + (XVFNMSUB_S v8f32:$xj, v8f32:$xk, v8f32:$xa)>; +def : Pat<(fneg (fma v4f64:$xj, v4f64:$xk, (fneg v4f64:$xa))), + (XVFNMSUB_D v4f64:$xj, v4f64:$xk, v4f64:$xa)>; +def : Pat<(fma_nsz (fneg v8f32:$xj), v8f32:$xk, v8f32:$xa), + (XVFNMSUB_S v8f32:$xj, v8f32:$xk, v8f32:$xa)>; +def : Pat<(fma_nsz (fneg v4f64:$xj), v4f64:$xk, v4f64:$xa), + (XVFNMSUB_D v4f64:$xj, v4f64:$xk, v4f64:$xa)>; + +// XVFSQRT_{S/D} +defm : PatXrF; + +// XVRECIP_{S/D} +def : Pat<(fdiv vsplatf32_fpimm_eq_1, v8f32:$xj), + (XVFRECIP_S v8f32:$xj)>; +def : Pat<(fdiv vsplatf64_fpimm_eq_1, v4f64:$xj), + (XVFRECIP_D v4f64:$xj)>; + +// XVFRSQRT_{S/D} +def : Pat<(fdiv vsplatf32_fpimm_eq_1, (fsqrt v8f32:$xj)), + (XVFRSQRT_S v8f32:$xj)>; +def : Pat<(fdiv vsplatf64_fpimm_eq_1, (fsqrt v4f64:$xj)), + (XVFRSQRT_D v4f64:$xj)>; + +// XVSEQ[I]_{B/H/W/D} +defm : PatCCXrSimm5; +defm : PatCCXrXr; + +// XVSLE[I]_{B/H/W/D}[U] +defm : PatCCXrSimm5; +defm : PatCCXrUimm5; +defm : PatCCXrXr; +defm : PatCCXrXrU; + +// XVSLT[I]_{B/H/W/D}[U] +defm : PatCCXrSimm5; +defm : PatCCXrUimm5; +defm : PatCCXrXr; +defm : PatCCXrXrU; + +// XVFCMP.cond.{S/D} +defm : PatCCXrXrF; +defm : PatCCXrXrF; +defm : PatCCXrXrF; + +defm : PatCCXrXrF; +defm : PatCCXrXrF; +defm : PatCCXrXrF; + +defm : PatCCXrXrF; +defm : PatCCXrXrF; +defm : PatCCXrXrF; + +defm : PatCCXrXrF; +defm : PatCCXrXrF; +defm : PatCCXrXrF; + +defm : PatCCXrXrF; +defm : PatCCXrXrF; + +// PseudoXVINSGR2VR_{B/H} +def : Pat<(vector_insert v32i8:$xd, GRLenVT:$rj, uimm5:$imm), + (PseudoXVINSGR2VR_B v32i8:$xd, GRLenVT:$rj, uimm5:$imm)>; +def : Pat<(vector_insert v16i16:$xd, GRLenVT:$rj, uimm4:$imm), + (PseudoXVINSGR2VR_H v16i16:$xd, GRLenVT:$rj, uimm4:$imm)>; + +// XVINSGR2VR_{W/D} +def : Pat<(vector_insert v8i32:$xd, GRLenVT:$rj, uimm3:$imm), + (XVINSGR2VR_W v8i32:$xd, GRLenVT:$rj, uimm3:$imm)>; +def : Pat<(vector_insert v4i64:$xd, GRLenVT:$rj, uimm2:$imm), + (XVINSGR2VR_D v4i64:$xd, GRLenVT:$rj, uimm2:$imm)>; + +def : Pat<(vector_insert v8f32:$vd, FPR32:$fj, uimm3:$imm), + (XVINSGR2VR_W $vd, (COPY_TO_REGCLASS FPR32:$fj, GPR), uimm3:$imm)>; +def : Pat<(vector_insert v4f64:$vd, FPR64:$fj, uimm2:$imm), + (XVINSGR2VR_D $vd, (COPY_TO_REGCLASS FPR64:$fj, GPR), uimm2:$imm)>; + +// XVPICKVE2GR_W[U] +def : Pat<(loongarch_vpick_sext_elt v8i32:$xd, uimm3:$imm, i32), + (XVPICKVE2GR_W v8i32:$xd, uimm3:$imm)>; +def : Pat<(loongarch_vpick_zext_elt v8i32:$xd, uimm3:$imm, i32), + (XVPICKVE2GR_WU v8i32:$xd, uimm3:$imm)>; + +// XVREPLGR2VR_{B/H/W/D} +def : Pat<(lasxsplati8 GPR:$rj), (XVREPLGR2VR_B GPR:$rj)>; +def : Pat<(lasxsplati16 GPR:$rj), (XVREPLGR2VR_H GPR:$rj)>; +def : Pat<(lasxsplati32 GPR:$rj), (XVREPLGR2VR_W GPR:$rj)>; +def : Pat<(lasxsplati64 GPR:$rj), (XVREPLGR2VR_D GPR:$rj)>; + +// XVREPLVE_{B/H/W/D} +def : Pat<(loongarch_vreplve v32i8:$xj, GRLenVT:$rk), + (XVREPLVE_B v32i8:$xj, GRLenVT:$rk)>; +def : Pat<(loongarch_vreplve v16i16:$xj, GRLenVT:$rk), + (XVREPLVE_H v16i16:$xj, GRLenVT:$rk)>; +def : Pat<(loongarch_vreplve v8i32:$xj, GRLenVT:$rk), + (XVREPLVE_W v8i32:$xj, GRLenVT:$rk)>; +def : Pat<(loongarch_vreplve v4i64:$xj, GRLenVT:$rk), + (XVREPLVE_D v4i64:$xj, GRLenVT:$rk)>; + +// XVSHUF_{B/H/W/D} +def : Pat<(loongarch_vshuf v32i8:$xa, v32i8:$xj, v32i8:$xk), + (XVSHUF_B v32i8:$xj, v32i8:$xk, v32i8:$xa)>; +def : Pat<(loongarch_vshuf v16i16:$xd, v16i16:$xj, v16i16:$xk), + (XVSHUF_H v16i16:$xd, v16i16:$xj, v16i16:$xk)>; +def : Pat<(loongarch_vshuf v8i32:$xd, v8i32:$xj, v8i32:$xk), + (XVSHUF_W v8i32:$xd, v8i32:$xj, v8i32:$xk)>; +def : Pat<(loongarch_vshuf v4i64:$xd, v4i64:$xj, v4i64:$xk), + (XVSHUF_D v4i64:$xd, v4i64:$xj, v4i64:$xk)>; +def : Pat<(loongarch_vshuf v8i32:$xd, v8f32:$xj, v8f32:$xk), + (XVSHUF_W v8i32:$xd, v8f32:$xj, v8f32:$xk)>; +def : Pat<(loongarch_vshuf v4i64:$xd, v4f64:$xj, v4f64:$xk), + (XVSHUF_D v4i64:$xd, v4f64:$xj, v4f64:$xk)>; + +// XVPICKEV_{B/H/W/D} +def : Pat<(loongarch_vpickev v32i8:$xj, v32i8:$xk), + (XVPICKEV_B v32i8:$xj, v32i8:$xk)>; +def : Pat<(loongarch_vpickev v16i16:$xj, v16i16:$xk), + (XVPICKEV_H v16i16:$xj, v16i16:$xk)>; +def : Pat<(loongarch_vpickev v8i32:$xj, v8i32:$xk), + (XVPICKEV_W v8i32:$xj, v8i32:$xk)>; +def : Pat<(loongarch_vpickev v4i64:$xj, v4i64:$xk), + (XVPICKEV_D v4i64:$xj, v4i64:$xk)>; +def : Pat<(loongarch_vpickev v8f32:$xj, v8f32:$xk), + (XVPICKEV_W v8f32:$xj, v8f32:$xk)>; +def : Pat<(loongarch_vpickev v4f64:$xj, v4f64:$xk), + (XVPICKEV_D v4f64:$xj, v4f64:$xk)>; + +// XVPICKOD_{B/H/W/D} +def : Pat<(loongarch_vpickod v32i8:$xj, v32i8:$xk), + (XVPICKOD_B v32i8:$xj, v32i8:$xk)>; +def : Pat<(loongarch_vpickod v16i16:$xj, v16i16:$xk), + (XVPICKOD_H v16i16:$xj, v16i16:$xk)>; +def : Pat<(loongarch_vpickod v8i32:$xj, v8i32:$xk), + (XVPICKOD_W v8i32:$xj, v8i32:$xk)>; +def : Pat<(loongarch_vpickod v4i64:$xj, v4i64:$xk), + (XVPICKOD_D v4i64:$xj, v4i64:$xk)>; +def : Pat<(loongarch_vpickod v8f32:$xj, v8f32:$xk), + (XVPICKOD_W v8f32:$xj, v8f32:$xk)>; +def : Pat<(loongarch_vpickod v4f64:$xj, v4f64:$xk), + (XVPICKOD_D v4f64:$xj, v4f64:$xk)>; + +// XVPACKEV_{B/H/W/D} +def : Pat<(loongarch_vpackev v32i8:$xj, v32i8:$xk), + (XVPACKEV_B v32i8:$xj, v32i8:$xk)>; +def : Pat<(loongarch_vpackev v16i16:$xj, v16i16:$xk), + (XVPACKEV_H v16i16:$xj, v16i16:$xk)>; +def : Pat<(loongarch_vpackev v8i32:$xj, v8i32:$xk), + (XVPACKEV_W v8i32:$xj, v8i32:$xk)>; +def : Pat<(loongarch_vpackev v4i64:$xj, v4i64:$xk), + (XVPACKEV_D v4i64:$xj, v4i64:$xk)>; +def : Pat<(loongarch_vpackev v8f32:$xj, v8f32:$xk), + (XVPACKEV_W v8f32:$xj, v8f32:$xk)>; +def : Pat<(loongarch_vpackev v4f64:$xj, v4f64:$xk), + (XVPACKEV_D v4f64:$xj, v4f64:$xk)>; + +// XVPACKOD_{B/H/W/D} +def : Pat<(loongarch_vpackod v32i8:$xj, v32i8:$xk), + (XVPACKOD_B v32i8:$xj, v32i8:$xk)>; +def : Pat<(loongarch_vpackod v16i16:$xj, v16i16:$xk), + (XVPACKOD_H v16i16:$xj, v16i16:$xk)>; +def : Pat<(loongarch_vpackod v8i32:$xj, v8i32:$xk), + (XVPACKOD_W v8i32:$xj, v8i32:$xk)>; +def : Pat<(loongarch_vpackod v4i64:$xj, v4i64:$xk), + (XVPACKOD_D v4i64:$xj, v4i64:$xk)>; +def : Pat<(loongarch_vpackod v8f32:$xj, v8f32:$xk), + (XVPACKOD_W v8f32:$xj, v8f32:$xk)>; +def : Pat<(loongarch_vpackod v4f64:$xj, v4f64:$xk), + (XVPACKOD_D v4f64:$xj, v4f64:$xk)>; + +// XVILVL_{B/H/W/D} +def : Pat<(loongarch_vilvl v32i8:$xj, v32i8:$xk), + (XVILVL_B v32i8:$xj, v32i8:$xk)>; +def : Pat<(loongarch_vilvl v16i16:$xj, v16i16:$xk), + (XVILVL_H v16i16:$xj, v16i16:$xk)>; +def : Pat<(loongarch_vilvl v8i32:$xj, v8i32:$xk), + (XVILVL_W v8i32:$xj, v8i32:$xk)>; +def : Pat<(loongarch_vilvl v4i64:$xj, v4i64:$xk), + (XVILVL_D v4i64:$xj, v4i64:$xk)>; +def : Pat<(loongarch_vilvl v8f32:$xj, v8f32:$xk), + (XVILVL_W v8f32:$xj, v8f32:$xk)>; +def : Pat<(loongarch_vilvl v4f64:$xj, v4f64:$xk), + (XVILVL_D v4f64:$xj, v4f64:$xk)>; + +// XVILVH_{B/H/W/D} +def : Pat<(loongarch_vilvh v32i8:$xj, v32i8:$xk), + (XVILVH_B v32i8:$xj, v32i8:$xk)>; +def : Pat<(loongarch_vilvh v16i16:$xj, v16i16:$xk), + (XVILVH_H v16i16:$xj, v16i16:$xk)>; +def : Pat<(loongarch_vilvh v8i32:$xj, v8i32:$xk), + (XVILVH_W v8i32:$xj, v8i32:$xk)>; +def : Pat<(loongarch_vilvh v4i64:$xj, v4i64:$xk), + (XVILVH_D v4i64:$xj, v4i64:$xk)>; +def : Pat<(loongarch_vilvh v8f32:$xj, v8f32:$xk), + (XVILVH_W v8f32:$xj, v8f32:$xk)>; +def : Pat<(loongarch_vilvh v4f64:$xj, v4f64:$xk), + (XVILVH_D v4f64:$xj, v4f64:$xk)>; + +// XVSHUF4I_{B/H/W} +def : Pat<(loongarch_vshuf4i v32i8:$xj, immZExt8:$ui8), + (XVSHUF4I_B v32i8:$xj, immZExt8:$ui8)>; +def : Pat<(loongarch_vshuf4i v16i16:$xj, immZExt8:$ui8), + (XVSHUF4I_H v16i16:$xj, immZExt8:$ui8)>; +def : Pat<(loongarch_vshuf4i v8i32:$xj, immZExt8:$ui8), + (XVSHUF4I_W v8i32:$xj, immZExt8:$ui8)>; +def : Pat<(loongarch_vshuf4i v8f32:$xj, immZExt8:$ui8), + (XVSHUF4I_W v8f32:$xj, immZExt8:$ui8)>; + +// XVREPL128VEI_{B/H/W/D} +def : Pat<(loongarch_vreplvei v32i8:$xj, immZExt4:$ui4), + (XVREPL128VEI_B v32i8:$xj, immZExt4:$ui4)>; +def : Pat<(loongarch_vreplvei v16i16:$xj, immZExt3:$ui3), + (XVREPL128VEI_H v16i16:$xj, immZExt3:$ui3)>; +def : Pat<(loongarch_vreplvei v8i32:$xj, immZExt2:$ui2), + (XVREPL128VEI_W v8i32:$xj, immZExt2:$ui2)>; +def : Pat<(loongarch_vreplvei v4i64:$xj, immZExt1:$ui1), + (XVREPL128VEI_D v4i64:$xj, immZExt1:$ui1)>; +def : Pat<(loongarch_vreplvei v8f32:$xj, immZExt2:$ui2), + (XVREPL128VEI_W v8f32:$xj, immZExt2:$ui2)>; +def : Pat<(loongarch_vreplvei v4f64:$xj, immZExt1:$ui1), + (XVREPL128VEI_D v4f64:$xj, immZExt1:$ui1)>; + +// XVPERMI_D +def : Pat<(loongarch_xvpermi v4i64:$xj, immZExt8: $ui8), + (XVPERMI_D v4i64:$xj, immZExt8: $ui8)>; +def : Pat<(loongarch_xvpermi v4f64:$xj, immZExt8: $ui8), + (XVPERMI_D v4f64:$xj, immZExt8: $ui8)>; + +// XVREPLVE0_{W/D} +def : Pat<(lasxsplatf32 FPR32:$fj), + (XVREPLVE0_W (SUBREG_TO_REG (i64 0), FPR32:$fj, sub_32))>; +def : Pat<(lasxsplatf64 FPR64:$fj), + (XVREPLVE0_D (SUBREG_TO_REG (i64 0), FPR64:$fj, sub_64))>; + +// Loads/Stores +foreach vt = [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64] in { + defm : LdPat; + def : RegRegLdPat; + defm : StPat; + def : RegRegStPat; +} + +// Vector extraction with constant index. +def : Pat<(i64 (vector_extract v32i8:$xj, uimm4:$imm)), + (VPICKVE2GR_B (EXTRACT_SUBREG v32i8:$xj, sub_128), uimm4:$imm)>; +def : Pat<(i64 (vector_extract v16i16:$xj, uimm3:$imm)), + (VPICKVE2GR_H (EXTRACT_SUBREG v16i16:$xj, sub_128), uimm3:$imm)>; +def : Pat<(i64 (vector_extract v8i32:$xj, uimm3:$imm)), + (XVPICKVE2GR_W v8i32:$xj, uimm3:$imm)>; +def : Pat<(i64 (vector_extract v4i64:$xj, uimm2:$imm)), + (XVPICKVE2GR_D v4i64:$xj, uimm2:$imm)>; +def : Pat<(f32 (vector_extract v8f32:$xj, uimm3:$imm)), + (MOVGR2FR_W (XVPICKVE2GR_W v8f32:$xj, uimm3:$imm))>; +def : Pat<(f64 (vector_extract v4f64:$xj, uimm2:$imm)), + (MOVGR2FR_D (XVPICKVE2GR_D v4f64:$xj, uimm2:$imm))>; + +// vselect +def : Pat<(v32i8 (vselect LASX256:$xd, (v32i8 (SplatPat_uimm8 uimm8:$imm)), + LASX256:$xj)), + (XVBITSELI_B LASX256:$xd, LASX256:$xj, uimm8:$imm)>; +foreach vt = [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64] in + def : Pat<(vt (vselect LASX256:$xa, LASX256:$xk, LASX256:$xj)), + (XVBITSEL_V LASX256:$xj, LASX256:$xk, LASX256:$xa)>; + +// fneg +def : Pat<(fneg (v8f32 LASX256:$xj)), (XVBITREVI_W LASX256:$xj, 31)>; +def : Pat<(fneg (v4f64 LASX256:$xj)), (XVBITREVI_D LASX256:$xj, 63)>; + +// XVFFINT_{S_W/D_L} +def : Pat<(v8f32 (sint_to_fp v8i32:$vj)), (XVFFINT_S_W v8i32:$vj)>; +def : Pat<(v4f64 (sint_to_fp v4i64:$vj)), (XVFFINT_D_L v4i64:$vj)>; +def : Pat<(v4f64 (sint_to_fp v4i32:$vj)), + (XVFFINT_D_L (VEXT2XV_D_W (SUBREG_TO_REG (i64 0), v4i32:$vj, + sub_128)))>; +def : Pat<(v4f32 (sint_to_fp v4i64:$vj)), + (EXTRACT_SUBREG (XVFCVT_S_D (XVPERMI_D (XVFFINT_D_L v4i64:$vj), 238), + (XVFFINT_D_L v4i64:$vj)), + sub_128)>; + +// XVFFINT_{S_WU/D_LU} +def : Pat<(v8f32 (uint_to_fp v8i32:$vj)), (XVFFINT_S_WU v8i32:$vj)>; +def : Pat<(v4f64 (uint_to_fp v4i64:$vj)), (XVFFINT_D_LU v4i64:$vj)>; +def : Pat<(v4f64 (uint_to_fp v4i32:$vj)), + (XVFFINT_D_LU (VEXT2XV_DU_WU (SUBREG_TO_REG (i64 0), v4i32:$vj, + sub_128)))>; +def : Pat<(v4f32 (uint_to_fp v4i64:$vj)), + (EXTRACT_SUBREG (XVFCVT_S_D (XVPERMI_D (XVFFINT_D_LU v4i64:$vj), 238), + (XVFFINT_D_LU v4i64:$vj)), + sub_128)>; + +// XVFTINTRZ_{W_S/L_D} +def : Pat<(v8i32 (fp_to_sint v8f32:$vj)), (XVFTINTRZ_W_S v8f32:$vj)>; +def : Pat<(v4i64 (fp_to_sint v4f64:$vj)), (XVFTINTRZ_L_D v4f64:$vj)>; +def : Pat<(v4i64 (fp_to_sint v4f32:$vj)), + (VEXT2XV_D_W (SUBREG_TO_REG (i64 0), (VFTINTRZ_W_S v4f32:$vj), + sub_128))>; +def : Pat<(v4i32 (fp_to_sint (v4f64 LASX256:$vj))), + (EXTRACT_SUBREG (XVFTINTRZ_W_S (XVFCVT_S_D (XVPERMI_D v4f64:$vj, 238), + v4f64:$vj)), + sub_128)>; + +// XVFTINTRZ_{W_SU/L_DU} +def : Pat<(v8i32 (fp_to_uint v8f32:$vj)), (XVFTINTRZ_WU_S v8f32:$vj)>; +def : Pat<(v4i64 (fp_to_uint v4f64:$vj)), (XVFTINTRZ_LU_D v4f64:$vj)>; +def : Pat<(v4i64 (fp_to_uint v4f32:$vj)), + (VEXT2XV_DU_WU (SUBREG_TO_REG (i64 0), (VFTINTRZ_WU_S v4f32:$vj), + sub_128))>; +def : Pat<(v4i32 (fp_to_uint (v4f64 LASX256:$vj))), + (EXTRACT_SUBREG (XVFTINTRZ_W_S (XVFCVT_S_D (XVPERMI_D v4f64:$vj, 238), + v4f64:$vj)), + sub_128)>; + +} // Predicates = [HasExtLASX] + +/// Intrinsic pattern + +class deriveLASXIntrinsic { + Intrinsic ret = !cast(!tolower("int_loongarch_lasx_"#Inst)); +} + +let Predicates = [HasExtLASX] in { + +// vty: v32i8/v16i16/v8i32/v4i64 +// Pat<(Intrinsic vty:$xj, vty:$xk), +// (LAInst vty:$xj, vty:$xk)>; +foreach Inst = ["XVSADD_B", "XVSADD_BU", "XVSSUB_B", "XVSSUB_BU", + "XVHADDW_H_B", "XVHADDW_HU_BU", "XVHSUBW_H_B", "XVHSUBW_HU_BU", + "XVADDWEV_H_B", "XVADDWOD_H_B", "XVSUBWEV_H_B", "XVSUBWOD_H_B", + "XVADDWEV_H_BU", "XVADDWOD_H_BU", "XVSUBWEV_H_BU", "XVSUBWOD_H_BU", + "XVADDWEV_H_BU_B", "XVADDWOD_H_BU_B", + "XVAVG_B", "XVAVG_BU", "XVAVGR_B", "XVAVGR_BU", + "XVABSD_B", "XVABSD_BU", "XVADDA_B", "XVMUH_B", "XVMUH_BU", + "XVMULWEV_H_B", "XVMULWOD_H_B", "XVMULWEV_H_BU", "XVMULWOD_H_BU", + "XVMULWEV_H_BU_B", "XVMULWOD_H_BU_B", "XVSIGNCOV_B", + "XVANDN_V", "XVORN_V", "XVROTR_B", "XVSRLR_B", "XVSRAR_B", + "XVSEQ_B", "XVSLE_B", "XVSLE_BU", "XVSLT_B", "XVSLT_BU", + "XVPACKEV_B", "XVPACKOD_B", "XVPICKEV_B", "XVPICKOD_B", + "XVILVL_B", "XVILVH_B"] in + def : Pat<(deriveLASXIntrinsic.ret + (v32i8 LASX256:$xj), (v32i8 LASX256:$xk)), + (!cast(Inst) LASX256:$xj, LASX256:$xk)>; +foreach Inst = ["XVSADD_H", "XVSADD_HU", "XVSSUB_H", "XVSSUB_HU", + "XVHADDW_W_H", "XVHADDW_WU_HU", "XVHSUBW_W_H", "XVHSUBW_WU_HU", + "XVADDWEV_W_H", "XVADDWOD_W_H", "XVSUBWEV_W_H", "XVSUBWOD_W_H", + "XVADDWEV_W_HU", "XVADDWOD_W_HU", "XVSUBWEV_W_HU", "XVSUBWOD_W_HU", + "XVADDWEV_W_HU_H", "XVADDWOD_W_HU_H", + "XVAVG_H", "XVAVG_HU", "XVAVGR_H", "XVAVGR_HU", + "XVABSD_H", "XVABSD_HU", "XVADDA_H", "XVMUH_H", "XVMUH_HU", + "XVMULWEV_W_H", "XVMULWOD_W_H", "XVMULWEV_W_HU", "XVMULWOD_W_HU", + "XVMULWEV_W_HU_H", "XVMULWOD_W_HU_H", "XVSIGNCOV_H", "XVROTR_H", + "XVSRLR_H", "XVSRAR_H", "XVSRLN_B_H", "XVSRAN_B_H", "XVSRLRN_B_H", + "XVSRARN_B_H", "XVSSRLN_B_H", "XVSSRAN_B_H", "XVSSRLN_BU_H", + "XVSSRAN_BU_H", "XVSSRLRN_B_H", "XVSSRARN_B_H", "XVSSRLRN_BU_H", + "XVSSRARN_BU_H", + "XVSEQ_H", "XVSLE_H", "XVSLE_HU", "XVSLT_H", "XVSLT_HU", + "XVPACKEV_H", "XVPACKOD_H", "XVPICKEV_H", "XVPICKOD_H", + "XVILVL_H", "XVILVH_H"] in + def : Pat<(deriveLASXIntrinsic.ret + (v16i16 LASX256:$xj), (v16i16 LASX256:$xk)), + (!cast(Inst) LASX256:$xj, LASX256:$xk)>; +foreach Inst = ["XVSADD_W", "XVSADD_WU", "XVSSUB_W", "XVSSUB_WU", + "XVHADDW_D_W", "XVHADDW_DU_WU", "XVHSUBW_D_W", "XVHSUBW_DU_WU", + "XVADDWEV_D_W", "XVADDWOD_D_W", "XVSUBWEV_D_W", "XVSUBWOD_D_W", + "XVADDWEV_D_WU", "XVADDWOD_D_WU", "XVSUBWEV_D_WU", "XVSUBWOD_D_WU", + "XVADDWEV_D_WU_W", "XVADDWOD_D_WU_W", + "XVAVG_W", "XVAVG_WU", "XVAVGR_W", "XVAVGR_WU", + "XVABSD_W", "XVABSD_WU", "XVADDA_W", "XVMUH_W", "XVMUH_WU", + "XVMULWEV_D_W", "XVMULWOD_D_W", "XVMULWEV_D_WU", "XVMULWOD_D_WU", + "XVMULWEV_D_WU_W", "XVMULWOD_D_WU_W", "XVSIGNCOV_W", "XVROTR_W", + "XVSRLR_W", "XVSRAR_W", "XVSRLN_H_W", "XVSRAN_H_W", "XVSRLRN_H_W", + "XVSRARN_H_W", "XVSSRLN_H_W", "XVSSRAN_H_W", "XVSSRLN_HU_W", + "XVSSRAN_HU_W", "XVSSRLRN_H_W", "XVSSRARN_H_W", "XVSSRLRN_HU_W", + "XVSSRARN_HU_W", + "XVSEQ_W", "XVSLE_W", "XVSLE_WU", "XVSLT_W", "XVSLT_WU", + "XVPACKEV_W", "XVPACKOD_W", "XVPICKEV_W", "XVPICKOD_W", + "XVILVL_W", "XVILVH_W", "XVPERM_W"] in + def : Pat<(deriveLASXIntrinsic.ret + (v8i32 LASX256:$xj), (v8i32 LASX256:$xk)), + (!cast(Inst) LASX256:$xj, LASX256:$xk)>; +foreach Inst = ["XVADD_Q", "XVSUB_Q", + "XVSADD_D", "XVSADD_DU", "XVSSUB_D", "XVSSUB_DU", + "XVHADDW_Q_D", "XVHADDW_QU_DU", "XVHSUBW_Q_D", "XVHSUBW_QU_DU", + "XVADDWEV_Q_D", "XVADDWOD_Q_D", "XVSUBWEV_Q_D", "XVSUBWOD_Q_D", + "XVADDWEV_Q_DU", "XVADDWOD_Q_DU", "XVSUBWEV_Q_DU", "XVSUBWOD_Q_DU", + "XVADDWEV_Q_DU_D", "XVADDWOD_Q_DU_D", + "XVAVG_D", "XVAVG_DU", "XVAVGR_D", "XVAVGR_DU", + "XVABSD_D", "XVABSD_DU", "XVADDA_D", "XVMUH_D", "XVMUH_DU", + "XVMULWEV_Q_D", "XVMULWOD_Q_D", "XVMULWEV_Q_DU", "XVMULWOD_Q_DU", + "XVMULWEV_Q_DU_D", "XVMULWOD_Q_DU_D", "XVSIGNCOV_D", "XVROTR_D", + "XVSRLR_D", "XVSRAR_D", "XVSRLN_W_D", "XVSRAN_W_D", "XVSRLRN_W_D", + "XVSRARN_W_D", "XVSSRLN_W_D", "XVSSRAN_W_D", "XVSSRLN_WU_D", + "XVSSRAN_WU_D", "XVSSRLRN_W_D", "XVSSRARN_W_D", "XVSSRLRN_WU_D", + "XVSSRARN_WU_D", "XVFFINT_S_L", + "XVSEQ_D", "XVSLE_D", "XVSLE_DU", "XVSLT_D", "XVSLT_DU", + "XVPACKEV_D", "XVPACKOD_D", "XVPICKEV_D", "XVPICKOD_D", + "XVILVL_D", "XVILVH_D"] in + def : Pat<(deriveLASXIntrinsic.ret + (v4i64 LASX256:$xj), (v4i64 LASX256:$xk)), + (!cast(Inst) LASX256:$xj, LASX256:$xk)>; + +// vty: v32i8/v16i16/v8i32/v4i64 +// Pat<(Intrinsic vty:$xd, vty:$xj, vty:$xk), +// (LAInst vty:$xd, vty:$xj, vty:$xk)>; +foreach Inst = ["XVMADDWEV_H_B", "XVMADDWOD_H_B", "XVMADDWEV_H_BU", + "XVMADDWOD_H_BU", "XVMADDWEV_H_BU_B", "XVMADDWOD_H_BU_B"] in + def : Pat<(deriveLASXIntrinsic.ret + (v16i16 LASX256:$xd), (v32i8 LASX256:$xj), (v32i8 LASX256:$xk)), + (!cast(Inst) LASX256:$xd, LASX256:$xj, LASX256:$xk)>; +foreach Inst = ["XVMADDWEV_W_H", "XVMADDWOD_W_H", "XVMADDWEV_W_HU", + "XVMADDWOD_W_HU", "XVMADDWEV_W_HU_H", "XVMADDWOD_W_HU_H"] in + def : Pat<(deriveLASXIntrinsic.ret + (v8i32 LASX256:$xd), (v16i16 LASX256:$xj), (v16i16 LASX256:$xk)), + (!cast(Inst) LASX256:$xd, LASX256:$xj, LASX256:$xk)>; +foreach Inst = ["XVMADDWEV_D_W", "XVMADDWOD_D_W", "XVMADDWEV_D_WU", + "XVMADDWOD_D_WU", "XVMADDWEV_D_WU_W", "XVMADDWOD_D_WU_W"] in + def : Pat<(deriveLASXIntrinsic.ret + (v4i64 LASX256:$xd), (v8i32 LASX256:$xj), (v8i32 LASX256:$xk)), + (!cast(Inst) LASX256:$xd, LASX256:$xj, LASX256:$xk)>; +foreach Inst = ["XVMADDWEV_Q_D", "XVMADDWOD_Q_D", "XVMADDWEV_Q_DU", + "XVMADDWOD_Q_DU", "XVMADDWEV_Q_DU_D", "XVMADDWOD_Q_DU_D"] in + def : Pat<(deriveLASXIntrinsic.ret + (v4i64 LASX256:$xd), (v4i64 LASX256:$xj), (v4i64 LASX256:$xk)), + (!cast(Inst) LASX256:$xd, LASX256:$xj, LASX256:$xk)>; + +// vty: v32i8/v16i16/v8i32/v4i64 +// Pat<(Intrinsic vty:$xj), +// (LAInst vty:$xj)>; +foreach Inst = ["XVEXTH_H_B", "XVEXTH_HU_BU", + "XVMSKLTZ_B", "XVMSKGEZ_B", "XVMSKNZ_B", + "XVCLO_B", "VEXT2XV_H_B", "VEXT2XV_HU_BU", + "VEXT2XV_W_B", "VEXT2XV_WU_BU", "VEXT2XV_D_B", + "VEXT2XV_DU_BU", "XVREPLVE0_B", "XVREPLVE0_Q"] in + def : Pat<(deriveLASXIntrinsic.ret (v32i8 LASX256:$xj)), + (!cast(Inst) LASX256:$xj)>; +foreach Inst = ["XVEXTH_W_H", "XVEXTH_WU_HU", "XVMSKLTZ_H", + "XVCLO_H", "XVFCVTL_S_H", "XVFCVTH_S_H", + "VEXT2XV_W_H", "VEXT2XV_WU_HU", "VEXT2XV_D_H", + "VEXT2XV_DU_HU", "XVREPLVE0_H"] in + def : Pat<(deriveLASXIntrinsic.ret (v16i16 LASX256:$xj)), + (!cast(Inst) LASX256:$xj)>; +foreach Inst = ["XVEXTH_D_W", "XVEXTH_DU_WU", "XVMSKLTZ_W", + "XVCLO_W", "XVFFINT_S_W", "XVFFINT_S_WU", + "XVFFINTL_D_W", "XVFFINTH_D_W", + "VEXT2XV_D_W", "VEXT2XV_DU_WU", "XVREPLVE0_W"] in + def : Pat<(deriveLASXIntrinsic.ret (v8i32 LASX256:$xj)), + (!cast(Inst) LASX256:$xj)>; +foreach Inst = ["XVEXTH_Q_D", "XVEXTH_QU_DU", "XVMSKLTZ_D", + "XVEXTL_Q_D", "XVEXTL_QU_DU", + "XVCLO_D", "XVFFINT_D_L", "XVFFINT_D_LU", + "XVREPLVE0_D"] in + def : Pat<(deriveLASXIntrinsic.ret (v4i64 LASX256:$xj)), + (!cast(Inst) LASX256:$xj)>; + +// Pat<(Intrinsic timm:$imm) +// (LAInst timm:$imm)>; +def : Pat<(int_loongarch_lasx_xvldi timm:$imm), + (XVLDI (to_valid_timm timm:$imm))>; +foreach Inst = ["XVREPLI_B", "XVREPLI_H", "XVREPLI_W", "XVREPLI_D"] in + def : Pat<(deriveLASXIntrinsic.ret timm:$imm), + (!cast("Pseudo"#Inst) (to_valid_timm timm:$imm))>; + +// vty: v32i8/v16i16/v8i32/v4i64 +// Pat<(Intrinsic vty:$xj, timm:$imm) +// (LAInst vty:$xj, timm:$imm)>; +foreach Inst = ["XVSAT_B", "XVSAT_BU", "XVNORI_B", "XVROTRI_B", "XVSLLWIL_H_B", + "XVSLLWIL_HU_BU", "XVSRLRI_B", "XVSRARI_B", + "XVSEQI_B", "XVSLEI_B", "XVSLEI_BU", "XVSLTI_B", "XVSLTI_BU", + "XVREPL128VEI_B", "XVBSLL_V", "XVBSRL_V", "XVSHUF4I_B"] in + def : Pat<(deriveLASXIntrinsic.ret (v32i8 LASX256:$xj), timm:$imm), + (!cast(Inst) LASX256:$xj, (to_valid_timm timm:$imm))>; +foreach Inst = ["XVSAT_H", "XVSAT_HU", "XVROTRI_H", "XVSLLWIL_W_H", + "XVSLLWIL_WU_HU", "XVSRLRI_H", "XVSRARI_H", + "XVSEQI_H", "XVSLEI_H", "XVSLEI_HU", "XVSLTI_H", "XVSLTI_HU", + "XVREPL128VEI_H", "XVSHUF4I_H"] in + def : Pat<(deriveLASXIntrinsic.ret (v16i16 LASX256:$xj), timm:$imm), + (!cast(Inst) LASX256:$xj, (to_valid_timm timm:$imm))>; +foreach Inst = ["XVSAT_W", "XVSAT_WU", "XVROTRI_W", "XVSLLWIL_D_W", + "XVSLLWIL_DU_WU", "XVSRLRI_W", "XVSRARI_W", + "XVSEQI_W", "XVSLEI_W", "XVSLEI_WU", "XVSLTI_W", "XVSLTI_WU", + "XVREPL128VEI_W", "XVSHUF4I_W", "XVPICKVE_W"] in + def : Pat<(deriveLASXIntrinsic.ret (v8i32 LASX256:$xj), timm:$imm), + (!cast(Inst) LASX256:$xj, (to_valid_timm timm:$imm))>; +foreach Inst = ["XVSAT_D", "XVSAT_DU", "XVROTRI_D", "XVSRLRI_D", "XVSRARI_D", + "XVSEQI_D", "XVSLEI_D", "XVSLEI_DU", "XVSLTI_D", "XVSLTI_DU", + "XVPICKVE2GR_D", "XVPICKVE2GR_DU", + "XVREPL128VEI_D", "XVPERMI_D", "XVPICKVE_D"] in + def : Pat<(deriveLASXIntrinsic.ret (v4i64 LASX256:$xj), timm:$imm), + (!cast(Inst) LASX256:$xj, (to_valid_timm timm:$imm))>; + +// vty: v32i8/v16i16/v8i32/v4i64 +// Pat<(Intrinsic vty:$xd, vty:$xj, timm:$imm) +// (LAInst vty:$xd, vty:$xj, timm:$imm)>; +foreach Inst = ["XVSRLNI_B_H", "XVSRANI_B_H", "XVSRLRNI_B_H", "XVSRARNI_B_H", + "XVSSRLNI_B_H", "XVSSRANI_B_H", "XVSSRLNI_BU_H", "XVSSRANI_BU_H", + "XVSSRLRNI_B_H", "XVSSRARNI_B_H", "XVSSRLRNI_BU_H", "XVSSRARNI_BU_H", + "XVFRSTPI_B", "XVBITSELI_B", "XVEXTRINS_B", "XVPERMI_Q"] in + def : Pat<(deriveLASXIntrinsic.ret + (v32i8 LASX256:$xd), (v32i8 LASX256:$xj), timm:$imm), + (!cast(Inst) LASX256:$xd, LASX256:$xj, + (to_valid_timm timm:$imm))>; +foreach Inst = ["XVSRLNI_H_W", "XVSRANI_H_W", "XVSRLRNI_H_W", "XVSRARNI_H_W", + "XVSSRLNI_H_W", "XVSSRANI_H_W", "XVSSRLNI_HU_W", "XVSSRANI_HU_W", + "XVSSRLRNI_H_W", "XVSSRARNI_H_W", "XVSSRLRNI_HU_W", "XVSSRARNI_HU_W", + "XVFRSTPI_H", "XVEXTRINS_H"] in + def : Pat<(deriveLASXIntrinsic.ret + (v16i16 LASX256:$xd), (v16i16 LASX256:$xj), timm:$imm), + (!cast(Inst) LASX256:$xd, LASX256:$xj, + (to_valid_timm timm:$imm))>; +foreach Inst = ["XVSRLNI_W_D", "XVSRANI_W_D", "XVSRLRNI_W_D", "XVSRARNI_W_D", + "XVSSRLNI_W_D", "XVSSRANI_W_D", "XVSSRLNI_WU_D", "XVSSRANI_WU_D", + "XVSSRLRNI_W_D", "XVSSRARNI_W_D", "XVSSRLRNI_WU_D", "XVSSRARNI_WU_D", + "XVPERMI_W", "XVEXTRINS_W", "XVINSVE0_W"] in + def : Pat<(deriveLASXIntrinsic.ret + (v8i32 LASX256:$xd), (v8i32 LASX256:$xj), timm:$imm), + (!cast(Inst) LASX256:$xd, LASX256:$xj, + (to_valid_timm timm:$imm))>; +foreach Inst = ["XVSRLNI_D_Q", "XVSRANI_D_Q", "XVSRLRNI_D_Q", "XVSRARNI_D_Q", + "XVSSRLNI_D_Q", "XVSSRANI_D_Q", "XVSSRLNI_DU_Q", "XVSSRANI_DU_Q", + "XVSSRLRNI_D_Q", "XVSSRARNI_D_Q", "XVSSRLRNI_DU_Q", "XVSSRARNI_DU_Q", + "XVSHUF4I_D", "XVEXTRINS_D", "XVINSVE0_D"] in + def : Pat<(deriveLASXIntrinsic.ret + (v4i64 LASX256:$xd), (v4i64 LASX256:$xj), timm:$imm), + (!cast(Inst) LASX256:$xd, LASX256:$xj, + (to_valid_timm timm:$imm))>; + +// vty: v32i8/v16i16/v8i32/v4i64 +// Pat<(Intrinsic vty:$xd, vty:$xj, vty:$xk), +// (LAInst vty:$xd, vty:$xj, vty:$xk)>; +foreach Inst = ["XVFRSTP_B", "XVBITSEL_V", "XVSHUF_B"] in + def : Pat<(deriveLASXIntrinsic.ret + (v32i8 LASX256:$xd), (v32i8 LASX256:$xj), (v32i8 LASX256:$xk)), + (!cast(Inst) LASX256:$xd, LASX256:$xj, LASX256:$xk)>; +foreach Inst = ["XVFRSTP_H", "XVSHUF_H"] in + def : Pat<(deriveLASXIntrinsic.ret + (v16i16 LASX256:$xd), (v16i16 LASX256:$xj), (v16i16 LASX256:$xk)), + (!cast(Inst) LASX256:$xd, LASX256:$xj, LASX256:$xk)>; +def : Pat<(int_loongarch_lasx_xvshuf_w (v8i32 LASX256:$xd), (v8i32 LASX256:$xj), + (v8i32 LASX256:$xk)), + (XVSHUF_W LASX256:$xd, LASX256:$xj, LASX256:$xk)>; +def : Pat<(int_loongarch_lasx_xvshuf_d (v4i64 LASX256:$xd), (v4i64 LASX256:$xj), + (v4i64 LASX256:$xk)), + (XVSHUF_D LASX256:$xd, LASX256:$xj, LASX256:$xk)>; + +// vty: v8f32/v4f64 +// Pat<(Intrinsic vty:$xj, vty:$xk, vty:$xa), +// (LAInst vty:$xj, vty:$xk, vty:$xa)>; +foreach Inst = ["XVFMSUB_S", "XVFNMADD_S", "XVFNMSUB_S"] in + def : Pat<(deriveLASXIntrinsic.ret + (v8f32 LASX256:$xj), (v8f32 LASX256:$xk), (v8f32 LASX256:$xa)), + (!cast(Inst) LASX256:$xj, LASX256:$xk, LASX256:$xa)>; +foreach Inst = ["XVFMSUB_D", "XVFNMADD_D", "XVFNMSUB_D"] in + def : Pat<(deriveLASXIntrinsic.ret + (v4f64 LASX256:$xj), (v4f64 LASX256:$xk), (v4f64 LASX256:$xa)), + (!cast(Inst) LASX256:$xj, LASX256:$xk, LASX256:$xa)>; + +// vty: v8f32/v4f64 +// Pat<(Intrinsic vty:$xj, vty:$xk), +// (LAInst vty:$xj, vty:$xk)>; +foreach Inst = ["XVFMAX_S", "XVFMIN_S", "XVFMAXA_S", "XVFMINA_S", "XVFCVT_H_S", + "XVFCMP_CAF_S", "XVFCMP_CUN_S", "XVFCMP_CEQ_S", "XVFCMP_CUEQ_S", + "XVFCMP_CLT_S", "XVFCMP_CULT_S", "XVFCMP_CLE_S", "XVFCMP_CULE_S", + "XVFCMP_CNE_S", "XVFCMP_COR_S", "XVFCMP_CUNE_S", + "XVFCMP_SAF_S", "XVFCMP_SUN_S", "XVFCMP_SEQ_S", "XVFCMP_SUEQ_S", + "XVFCMP_SLT_S", "XVFCMP_SULT_S", "XVFCMP_SLE_S", "XVFCMP_SULE_S", + "XVFCMP_SNE_S", "XVFCMP_SOR_S", "XVFCMP_SUNE_S"] in + def : Pat<(deriveLASXIntrinsic.ret + (v8f32 LASX256:$xj), (v8f32 LASX256:$xk)), + (!cast(Inst) LASX256:$xj, LASX256:$xk)>; +foreach Inst = ["XVFMAX_D", "XVFMIN_D", "XVFMAXA_D", "XVFMINA_D", "XVFCVT_S_D", + "XVFTINTRNE_W_D", "XVFTINTRZ_W_D", "XVFTINTRP_W_D", "XVFTINTRM_W_D", + "XVFTINT_W_D", + "XVFCMP_CAF_D", "XVFCMP_CUN_D", "XVFCMP_CEQ_D", "XVFCMP_CUEQ_D", + "XVFCMP_CLT_D", "XVFCMP_CULT_D", "XVFCMP_CLE_D", "XVFCMP_CULE_D", + "XVFCMP_CNE_D", "XVFCMP_COR_D", "XVFCMP_CUNE_D", + "XVFCMP_SAF_D", "XVFCMP_SUN_D", "XVFCMP_SEQ_D", "XVFCMP_SUEQ_D", + "XVFCMP_SLT_D", "XVFCMP_SULT_D", "XVFCMP_SLE_D", "XVFCMP_SULE_D", + "XVFCMP_SNE_D", "XVFCMP_SOR_D", "XVFCMP_SUNE_D"] in + def : Pat<(deriveLASXIntrinsic.ret + (v4f64 LASX256:$xj), (v4f64 LASX256:$xk)), + (!cast(Inst) LASX256:$xj, LASX256:$xk)>; + +// vty: v8f32/v4f64 +// Pat<(Intrinsic vty:$xj), +// (LAInst vty:$xj)>; +foreach Inst = ["XVFLOGB_S", "XVFCLASS_S", "XVFSQRT_S", "XVFRECIP_S", "XVFRSQRT_S", + "XVFRINT_S", "XVFCVTL_D_S", "XVFCVTH_D_S", + "XVFRINTRNE_S", "XVFRINTRZ_S", "XVFRINTRP_S", "XVFRINTRM_S", + "XVFTINTRNE_W_S", "XVFTINTRZ_W_S", "XVFTINTRP_W_S", "XVFTINTRM_W_S", + "XVFTINT_W_S", "XVFTINTRZ_WU_S", "XVFTINT_WU_S", + "XVFTINTRNEL_L_S", "XVFTINTRNEH_L_S", "XVFTINTRZL_L_S", + "XVFTINTRZH_L_S", "XVFTINTRPL_L_S", "XVFTINTRPH_L_S", + "XVFTINTRML_L_S", "XVFTINTRMH_L_S", "XVFTINTL_L_S", + "XVFTINTH_L_S"] in + def : Pat<(deriveLASXIntrinsic.ret (v8f32 LASX256:$xj)), + (!cast(Inst) LASX256:$xj)>; +foreach Inst = ["XVFLOGB_D", "XVFCLASS_D", "XVFSQRT_D", "XVFRECIP_D", "XVFRSQRT_D", + "XVFRINT_D", + "XVFRINTRNE_D", "XVFRINTRZ_D", "XVFRINTRP_D", "XVFRINTRM_D", + "XVFTINTRNE_L_D", "XVFTINTRZ_L_D", "XVFTINTRP_L_D", "XVFTINTRM_L_D", + "XVFTINT_L_D", "XVFTINTRZ_LU_D", "XVFTINT_LU_D"] in + def : Pat<(deriveLASXIntrinsic.ret (v4f64 LASX256:$xj)), + (!cast(Inst) LASX256:$xj)>; + +def : Pat<(int_loongarch_lasx_xvpickve_w_f v8f32:$xj, timm:$imm), + (XVPICKVE_W v8f32:$xj, (to_valid_timm timm:$imm))>; +def : Pat<(int_loongarch_lasx_xvpickve_d_f v4f64:$xj, timm:$imm), + (XVPICKVE_D v4f64:$xj, (to_valid_timm timm:$imm))>; + +// load +def : Pat<(int_loongarch_lasx_xvld GPR:$rj, timm:$imm), + (XVLD GPR:$rj, (to_valid_timm timm:$imm))>; +def : Pat<(int_loongarch_lasx_xvldx GPR:$rj, GPR:$rk), + (XVLDX GPR:$rj, GPR:$rk)>; + +def : Pat<(int_loongarch_lasx_xvldrepl_b GPR:$rj, timm:$imm), + (XVLDREPL_B GPR:$rj, (to_valid_timm timm:$imm))>; +def : Pat<(int_loongarch_lasx_xvldrepl_h GPR:$rj, timm:$imm), + (XVLDREPL_H GPR:$rj, (to_valid_timm timm:$imm))>; +def : Pat<(int_loongarch_lasx_xvldrepl_w GPR:$rj, timm:$imm), + (XVLDREPL_W GPR:$rj, (to_valid_timm timm:$imm))>; +def : Pat<(int_loongarch_lasx_xvldrepl_d GPR:$rj, timm:$imm), + (XVLDREPL_D GPR:$rj, (to_valid_timm timm:$imm))>; + +// store +def : Pat<(int_loongarch_lasx_xvst LASX256:$xd, GPR:$rj, timm:$imm), + (XVST LASX256:$xd, GPR:$rj, (to_valid_timm timm:$imm))>; +def : Pat<(int_loongarch_lasx_xvstx LASX256:$xd, GPR:$rj, GPR:$rk), + (XVSTX LASX256:$xd, GPR:$rj, GPR:$rk)>; + +def : Pat<(int_loongarch_lasx_xvstelm_b v32i8:$xd, GPR:$rj, timm:$imm, timm:$idx), + (XVSTELM_B v32i8:$xd, GPR:$rj, (to_valid_timm timm:$imm), + (to_valid_timm timm:$idx))>; +def : Pat<(int_loongarch_lasx_xvstelm_h v16i16:$xd, GPR:$rj, timm:$imm, timm:$idx), + (XVSTELM_H v16i16:$xd, GPR:$rj, (to_valid_timm timm:$imm), + (to_valid_timm timm:$idx))>; +def : Pat<(int_loongarch_lasx_xvstelm_w v8i32:$xd, GPR:$rj, timm:$imm, timm:$idx), + (XVSTELM_W v8i32:$xd, GPR:$rj, (to_valid_timm timm:$imm), + (to_valid_timm timm:$idx))>; +def : Pat<(int_loongarch_lasx_xvstelm_d v4i64:$xd, GPR:$rj, timm:$imm, timm:$idx), + (XVSTELM_D v4i64:$xd, GPR:$rj, (to_valid_timm timm:$imm), + (to_valid_timm timm:$idx))>; + +} // Predicates = [HasExtLASX] diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrFormats.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrFormats.td new file mode 100644 index 0000000000000000000000000000000000000000..843f9cbd94e7487c88d86bbef5699c0989a6b30e --- /dev/null +++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrFormats.td @@ -0,0 +1,486 @@ +// LoongArchLSXInstrFormats.td - LoongArch LSX Instr Formats -*- tablegen -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Describe LoongArch LSX instructions format +// +// opcode - operation code. +// vd/rd/cd - destination register operand. +// {r/v}{j/k} - source register operand. +// immN - immediate data operand. +// +//===----------------------------------------------------------------------===// + +// 1RI13-type +// +class Fmt1RI13_VI op, dag outs, dag ins, string opnstr, + list pattern = []> + : LAInst.ret, opnstr, pattern> { + bits<13> imm13; + bits<5> vd; + + let Inst{31-0} = op; + let Inst{17-5} = imm13; + let Inst{4-0} = vd; +} + +// 2R-type +// +class Fmt2R_VV op, dag outs, dag ins, string opnstr, + list pattern = []> + : LAInst.ret, opnstr, pattern> { + bits<5> vj; + bits<5> vd; + + let Inst{31-0} = op; + let Inst{9-5} = vj; + let Inst{4-0} = vd; +} + +// +class Fmt2R_VR op, dag outs, dag ins, string opnstr, + list pattern = []> + : LAInst.ret, opnstr, pattern> { + bits<5> rj; + bits<5> vd; + + let Inst{31-0} = op; + let Inst{9-5} = rj; + let Inst{4-0} = vd; +} + +// +class Fmt2R_CV op, dag outs, dag ins, string opnstr, + list pattern = []> + : LAInst.ret, opnstr, pattern> { + bits<5> vj; + bits<3> cd; + + let Inst{31-0} = op; + let Inst{9-5} = vj; + let Inst{2-0} = cd; +} + +// 2RI1-type +// +class Fmt2RI1_VVI op, dag outs, dag ins, string opnstr, + list pattern = []> + : LAInst.ret, opnstr, pattern> { + bits<1> imm1; + bits<5> vj; + bits<5> vd; + + let Inst{31-0} = op; + let Inst{10} = imm1; + let Inst{9-5} = vj; + let Inst{4-0} = vd; +} + +// +class Fmt2RI1_VRI op, dag outs, dag ins, string opnstr, + list pattern = []> + : LAInst.ret, opnstr, pattern> { + bits<1> imm1; + bits<5> rj; + bits<5> vd; + + let Inst{31-0} = op; + let Inst{10} = imm1; + let Inst{9-5} = rj; + let Inst{4-0} = vd; +} + +// +class Fmt2RI1_RVI op, dag outs, dag ins, string opnstr, + list pattern = []> + : LAInst.ret, opnstr, pattern> { + bits<1> imm1; + bits<5> vj; + bits<5> rd; + + let Inst{31-0} = op; + let Inst{10} = imm1; + let Inst{9-5} = vj; + let Inst{4-0} = rd; +} + +// 2RI2-type +// +class Fmt2RI2_VVI op, dag outs, dag ins, string opnstr, + list pattern = []> + : LAInst.ret, opnstr, pattern> { + bits<2> imm2; + bits<5> vj; + bits<5> vd; + + let Inst{31-0} = op; + let Inst{11-10} = imm2; + let Inst{9-5} = vj; + let Inst{4-0} = vd; +} + +// +class Fmt2RI2_VRI op, dag outs, dag ins, string opnstr, + list pattern = []> + : LAInst.ret, opnstr, pattern> { + bits<2> imm2; + bits<5> rj; + bits<5> vd; + + let Inst{31-0} = op; + let Inst{11-10} = imm2; + let Inst{9-5} = rj; + let Inst{4-0} = vd; +} + +// +class Fmt2RI2_RVI op, dag outs, dag ins, string opnstr, + list pattern = []> + : LAInst.ret, opnstr, pattern> { + bits<2> imm2; + bits<5> vj; + bits<5> rd; + + let Inst{31-0} = op; + let Inst{11-10} = imm2; + let Inst{9-5} = vj; + let Inst{4-0} = rd; +} + +// 2RI3-type +// +class Fmt2RI3_VVI op, dag outs, dag ins, string opnstr, + list pattern = []> + : LAInst.ret, opnstr, pattern> { + bits<3> imm3; + bits<5> vj; + bits<5> vd; + + let Inst{31-0} = op; + let Inst{12-10} = imm3; + let Inst{9-5} = vj; + let Inst{4-0} = vd; +} + +// +class Fmt2RI3_VRI op, dag outs, dag ins, string opnstr, + list pattern = []> + : LAInst.ret, opnstr, pattern> { + bits<3> imm3; + bits<5> rj; + bits<5> vd; + + let Inst{31-0} = op; + let Inst{12-10} = imm3; + let Inst{9-5} = rj; + let Inst{4-0} = vd; +} + +// +class Fmt2RI3_RVI op, dag outs, dag ins, string opnstr, + list pattern = []> + : LAInst.ret, opnstr, pattern> { + bits<3> imm3; + bits<5> vj; + bits<5> rd; + + let Inst{31-0} = op; + let Inst{12-10} = imm3; + let Inst{9-5} = vj; + let Inst{4-0} = rd; +} + +// 2RI4-type +// +class Fmt2RI4_VVI op, dag outs, dag ins, string opnstr, + list pattern = []> + : LAInst.ret, opnstr, pattern> { + bits<4> imm4; + bits<5> vj; + bits<5> vd; + + let Inst{31-0} = op; + let Inst{13-10} = imm4; + let Inst{9-5} = vj; + let Inst{4-0} = vd; +} + +// +class Fmt2RI4_VRI op, dag outs, dag ins, string opnstr, + list pattern = []> + : LAInst.ret, opnstr, pattern> { + bits<4> imm4; + bits<5> rj; + bits<5> vd; + + let Inst{31-0} = op; + let Inst{13-10} = imm4; + let Inst{9-5} = rj; + let Inst{4-0} = vd; +} + +// +class Fmt2RI4_RVI op, dag outs, dag ins, string opnstr, + list pattern = []> + : LAInst.ret, opnstr, pattern> { + bits<4> imm4; + bits<5> vj; + bits<5> rd; + + let Inst{31-0} = op; + let Inst{13-10} = imm4; + let Inst{9-5} = vj; + let Inst{4-0} = rd; +} + +// 2RI5-type +// +class Fmt2RI5_VVI op, dag outs, dag ins, string opnstr, + list pattern = []> + : LAInst.ret, opnstr, pattern> { + bits<5> imm5; + bits<5> vj; + bits<5> vd; + + let Inst{31-0} = op; + let Inst{14-10} = imm5; + let Inst{9-5} = vj; + let Inst{4-0} = vd; +} + +// 2RI6-type +// +class Fmt2RI6_VVI op, dag outs, dag ins, string opnstr, + list pattern = []> + : LAInst.ret, opnstr, pattern> { + bits<6> imm6; + bits<5> vj; + bits<5> vd; + + let Inst{31-0} = op; + let Inst{15-10} = imm6; + let Inst{9-5} = vj; + let Inst{4-0} = vd; +} + +// 2RI7-type +// +class Fmt2RI7_VVI op, dag outs, dag ins, string opnstr, + list pattern = []> + : LAInst.ret, opnstr, pattern> { + bits<7> imm7; + bits<5> vj; + bits<5> vd; + + let Inst{31-0} = op; + let Inst{16-10} = imm7; + let Inst{9-5} = vj; + let Inst{4-0} = vd; +} + +// 2RI8-type +// +class Fmt2RI8_VVI op, dag outs, dag ins, string opnstr, + list pattern = []> + : LAInst.ret, opnstr, pattern> { + bits<8> imm8; + bits<5> vj; + bits<5> vd; + + let Inst{31-0} = op; + let Inst{17-10} = imm8; + let Inst{9-5} = vj; + let Inst{4-0} = vd; +} + +// 2RI8I1-type +// +class Fmt2RI8I1_VRII op, dag outs, dag ins, string opnstr, + list pattern = []> + : LAInst.ret, opnstr, pattern> { + bits<1> imm1; + bits<8> imm8; + bits<5> rj; + bits<5> vd; + + let Inst{31-0} = op; + let Inst{18} = imm1; + let Inst{17-10} = imm8; + let Inst{9-5} = rj; + let Inst{4-0} = vd; +} + +// 2RI8I2-type +// +class Fmt2RI8I2_VRII op, dag outs, dag ins, string opnstr, + list pattern = []> + : LAInst.ret, opnstr, pattern> { + bits<2> imm2; + bits<8> imm8; + bits<5> rj; + bits<5> vd; + + let Inst{31-0} = op; + let Inst{19-18} = imm2; + let Inst{17-10} = imm8; + let Inst{9-5} = rj; + let Inst{4-0} = vd; +} + +// 2RI8I3-type +// +class Fmt2RI8I3_VRII op, dag outs, dag ins, string opnstr, + list pattern = []> + : LAInst.ret, opnstr, pattern> { + bits<3> imm3; + bits<8> imm8; + bits<5> rj; + bits<5> vd; + + let Inst{31-0} = op; + let Inst{20-18} = imm3; + let Inst{17-10} = imm8; + let Inst{9-5} = rj; + let Inst{4-0} = vd; +} + +// 2RI8I4-type +// +class Fmt2RI8I4_VRII op, dag outs, dag ins, string opnstr, + list pattern = []> + : LAInst.ret, opnstr, pattern> { + bits<4> imm4; + bits<8> imm8; + bits<5> rj; + bits<5> vd; + + let Inst{31-0} = op; + let Inst{21-18} = imm4; + let Inst{17-10} = imm8; + let Inst{9-5} = rj; + let Inst{4-0} = vd; +} +// 2RI9-type +// +class Fmt2RI9_VRI op, dag outs, dag ins, string opnstr, + list pattern = []> + : LAInst.ret, opnstr, pattern> { + bits<9> imm9; + bits<5> rj; + bits<5> vd; + + let Inst{31-0} = op; + let Inst{18-10} = imm9; + let Inst{9-5} = rj; + let Inst{4-0} = vd; +} + +// 2RI10-type +// +class Fmt2RI10_VRI op, dag outs, dag ins, string opnstr, + list pattern = []> + : LAInst.ret, opnstr, pattern> { + bits<10> imm10; + bits<5> rj; + bits<5> vd; + + let Inst{31-0} = op; + let Inst{19-10} = imm10; + let Inst{9-5} = rj; + let Inst{4-0} = vd; +} + +// 2RI11-type +// +class Fmt2RI11_VRI op, dag outs, dag ins, string opnstr, + list pattern = []> + : LAInst.ret, opnstr, pattern> { + bits<11> imm11; + bits<5> rj; + bits<5> vd; + + let Inst{31-0} = op; + let Inst{20-10} = imm11; + let Inst{9-5} = rj; + let Inst{4-0} = vd; +} + +// 2RI12-type +// +class Fmt2RI12_VRI op, dag outs, dag ins, string opnstr, + list pattern = []> + : LAInst.ret, opnstr, pattern> { + bits<12> imm12; + bits<5> rj; + bits<5> vd; + + let Inst{31-0} = op; + let Inst{21-10} = imm12; + let Inst{9-5} = rj; + let Inst{4-0} = vd; +} + +// 3R-type +// +class Fmt3R_VVV op, dag outs, dag ins, string opnstr, + list pattern = []> + : LAInst.ret, opnstr, pattern> { + bits<5> vk; + bits<5> vj; + bits<5> vd; + + let Inst{31-0} = op; + let Inst{14-10} = vk; + let Inst{9-5} = vj; + let Inst{4-0} = vd; +} + +// +class Fmt3R_VVR op, dag outs, dag ins, string opnstr, + list pattern = []> + : LAInst.ret, opnstr, pattern> { + bits<5> rk; + bits<5> vj; + bits<5> vd; + + let Inst{31-0} = op; + let Inst{14-10} = rk; + let Inst{9-5} = vj; + let Inst{4-0} = vd; +} + +// +class Fmt3R_VRR op, dag outs, dag ins, string opnstr, + list pattern = []> + : LAInst.ret, opnstr, pattern> { + bits<5> rk; + bits<5> rj; + bits<5> vd; + + let Inst{31-0} = op; + let Inst{14-10} = rk; + let Inst{9-5} = rj; + let Inst{4-0} = vd; +} + +// 4R-type +// +class Fmt4R_VVVV op, dag outs, dag ins, string opnstr, + list pattern = []> + : LAInst.ret, opnstr, pattern> { + bits<5> va; + bits<5> vk; + bits<5> vj; + bits<5> vd; + + let Inst{31-0} = op; + let Inst{19-15} = va; + let Inst{14-10} = vk; + let Inst{9-5} = vj; + let Inst{4-0} = vd; +} diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td new file mode 100644 index 0000000000000000000000000000000000000000..3519fa3142c3c78bf8d39b74be390c8ca3888e36 --- /dev/null +++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td @@ -0,0 +1,2228 @@ +//===- LoongArchLSXInstrInfo.td - LoongArch LSX instructions -*- tablegen -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file describes the SIMD extension instructions. +// +//===----------------------------------------------------------------------===// + +def SDT_LoongArchVreplve : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisVec<0>, + SDTCisInt<1>, SDTCisVec<1>, + SDTCisSameAs<0, 1>, SDTCisInt<2>]>; +def SDT_LoongArchVecCond : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisVec<1>]>; + +def SDT_LoongArchVShuf : SDTypeProfile<1, 3, [SDTCisVec<0>, + SDTCisInt<1>, SDTCisVec<1>, + SDTCisSameAs<0, 2>, + SDTCisSameAs<2, 3>]>; +def SDT_LoongArchV2R : SDTypeProfile<1, 2, [SDTCisVec<0>, + SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>]>; +def SDT_loongArchV1RUimm: SDTypeProfile<1, 2, [SDTCisVec<0>, + SDTCisSameAs<0,1>, SDTCisVT<2, i64>]>; + +// Target nodes. +def loongarch_vreplve : SDNode<"LoongArchISD::VREPLVE", SDT_LoongArchVreplve>; +def loongarch_vall_nonzero : SDNode<"LoongArchISD::VALL_NONZERO", + SDT_LoongArchVecCond>; +def loongarch_vany_nonzero : SDNode<"LoongArchISD::VANY_NONZERO", + SDT_LoongArchVecCond>; +def loongarch_vall_zero : SDNode<"LoongArchISD::VALL_ZERO", + SDT_LoongArchVecCond>; +def loongarch_vany_zero : SDNode<"LoongArchISD::VANY_ZERO", + SDT_LoongArchVecCond>; + +def loongarch_vpick_sext_elt : SDNode<"LoongArchISD::VPICK_SEXT_ELT", + SDTypeProfile<1, 3, [SDTCisPtrTy<2>]>>; +def loongarch_vpick_zext_elt : SDNode<"LoongArchISD::VPICK_ZEXT_ELT", + SDTypeProfile<1, 3, [SDTCisPtrTy<2>]>>; + +def loongarch_vshuf: SDNode<"LoongArchISD::VSHUF", SDT_LoongArchVShuf>; +def loongarch_vpickev: SDNode<"LoongArchISD::VPICKEV", SDT_LoongArchV2R>; +def loongarch_vpickod: SDNode<"LoongArchISD::VPICKOD", SDT_LoongArchV2R>; +def loongarch_vpackev: SDNode<"LoongArchISD::VPACKEV", SDT_LoongArchV2R>; +def loongarch_vpackod: SDNode<"LoongArchISD::VPACKOD", SDT_LoongArchV2R>; +def loongarch_vilvl: SDNode<"LoongArchISD::VILVL", SDT_LoongArchV2R>; +def loongarch_vilvh: SDNode<"LoongArchISD::VILVH", SDT_LoongArchV2R>; + +def loongarch_vshuf4i: SDNode<"LoongArchISD::VSHUF4I", SDT_loongArchV1RUimm>; +def loongarch_vreplvei: SDNode<"LoongArchISD::VREPLVEI", SDT_loongArchV1RUimm>; + +def immZExt1 : ImmLeaf(Imm);}]>; +def immZExt2 : ImmLeaf(Imm);}]>; +def immZExt3 : ImmLeaf(Imm);}]>; +def immZExt4 : ImmLeaf(Imm);}]>; +def immZExt8 : ImmLeaf(Imm);}]>; + +class VecCond + : Pseudo<(outs GPR:$rd), (ins RC:$vj), + [(set GPR:$rd, (OpNode (TyNode RC:$vj)))]> { + let hasSideEffects = 0; + let mayLoad = 0; + let mayStore = 0; + let usesCustomInserter = 1; +} + +def vsplat_imm_eq_1 : PatFrags<(ops), [(build_vector), + (bitconvert (v4i32 (build_vector)))], [{ + APInt Imm; + EVT EltTy = N->getValueType(0).getVectorElementType(); + + if (N->getOpcode() == ISD::BITCAST) + N = N->getOperand(0).getNode(); + + return selectVSplat(N, Imm, EltTy.getSizeInBits()) && + Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 1; +}]>; + +def vsplati8_imm_eq_7 : PatFrags<(ops), [(build_vector)], [{ + APInt Imm; + EVT EltTy = N->getValueType(0).getVectorElementType(); + + if (N->getOpcode() == ISD::BITCAST) + N = N->getOperand(0).getNode(); + + return selectVSplat(N, Imm, EltTy.getSizeInBits()) && + Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 7; +}]>; +def vsplati16_imm_eq_15 : PatFrags<(ops), [(build_vector)], [{ + APInt Imm; + EVT EltTy = N->getValueType(0).getVectorElementType(); + + if (N->getOpcode() == ISD::BITCAST) + N = N->getOperand(0).getNode(); + + return selectVSplat(N, Imm, EltTy.getSizeInBits()) && + Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 15; +}]>; +def vsplati32_imm_eq_31 : PatFrags<(ops), [(build_vector)], [{ + APInt Imm; + EVT EltTy = N->getValueType(0).getVectorElementType(); + + if (N->getOpcode() == ISD::BITCAST) + N = N->getOperand(0).getNode(); + + return selectVSplat(N, Imm, EltTy.getSizeInBits()) && + Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 31; +}]>; +def vsplati64_imm_eq_63 : PatFrags<(ops), [(build_vector), + (bitconvert (v4i32 (build_vector)))], [{ + APInt Imm; + EVT EltTy = N->getValueType(0).getVectorElementType(); + + if (N->getOpcode() == ISD::BITCAST) + N = N->getOperand(0).getNode(); + + return selectVSplat(N, Imm, EltTy.getSizeInBits()) && + Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 63; +}]>; + +def vsplatf32_fpimm_eq_1 + : PatFrags<(ops), [(bitconvert (v4i32 (build_vector))), + (bitconvert (v8i32 (build_vector)))], [{ + APInt Imm; + EVT EltTy = N->getValueType(0).getVectorElementType(); + N = N->getOperand(0).getNode(); + + return selectVSplat(N, Imm, EltTy.getSizeInBits()) && + Imm.getBitWidth() == EltTy.getSizeInBits() && + Imm == APFloat(+1.0f).bitcastToAPInt(); +}]>; +def vsplatf64_fpimm_eq_1 + : PatFrags<(ops), [(bitconvert (v2i64 (build_vector))), + (bitconvert (v4i64 (build_vector)))], [{ + APInt Imm; + EVT EltTy = N->getValueType(0).getVectorElementType(); + N = N->getOperand(0).getNode(); + + return selectVSplat(N, Imm, EltTy.getSizeInBits()) && + Imm.getBitWidth() == EltTy.getSizeInBits() && + Imm == APFloat(+1.0).bitcastToAPInt(); +}]>; + +def vsplati8imm7 : PatFrag<(ops node:$reg), + (and node:$reg, vsplati8_imm_eq_7)>; +def vsplati16imm15 : PatFrag<(ops node:$reg), + (and node:$reg, vsplati16_imm_eq_15)>; +def vsplati32imm31 : PatFrag<(ops node:$reg), + (and node:$reg, vsplati32_imm_eq_31)>; +def vsplati64imm63 : PatFrag<(ops node:$reg), + (and node:$reg, vsplati64_imm_eq_63)>; + +foreach N = [3, 4, 5, 6, 8] in + def SplatPat_uimm#N : ComplexPattern", + [build_vector, bitconvert], [], 2>; + +foreach N = [5] in + def SplatPat_simm#N : ComplexPattern", + [build_vector, bitconvert]>; + +def vsplat_uimm_inv_pow2 : ComplexPattern; + +def vsplat_uimm_pow2 : ComplexPattern; + +def muladd : PatFrag<(ops node:$vd, node:$vj, node:$vk), + (add node:$vd, (mul node:$vj, node:$vk))>; + +def mulsub : PatFrag<(ops node:$vd, node:$vj, node:$vk), + (sub node:$vd, (mul node:$vj, node:$vk))>; + +def lsxsplati8 : PatFrag<(ops node:$e0), + (v16i8 (build_vector node:$e0, node:$e0, + node:$e0, node:$e0, + node:$e0, node:$e0, + node:$e0, node:$e0, + node:$e0, node:$e0, + node:$e0, node:$e0, + node:$e0, node:$e0, + node:$e0, node:$e0))>; +def lsxsplati16 : PatFrag<(ops node:$e0), + (v8i16 (build_vector node:$e0, node:$e0, + node:$e0, node:$e0, + node:$e0, node:$e0, + node:$e0, node:$e0))>; +def lsxsplati32 : PatFrag<(ops node:$e0), + (v4i32 (build_vector node:$e0, node:$e0, + node:$e0, node:$e0))>; +def lsxsplati64 : PatFrag<(ops node:$e0), + (v2i64 (build_vector node:$e0, node:$e0))>; +def lsxsplatf32 : PatFrag<(ops node:$e0), + (v4f32 (build_vector node:$e0, node:$e0, + node:$e0, node:$e0))>; +def lsxsplatf64 : PatFrag<(ops node:$e0), + (v2f64 (build_vector node:$e0, node:$e0))>; + +def to_valid_timm : SDNodeXForm(N); + return CurDAG->getTargetConstant(CN->getSExtValue(), SDLoc(N), Subtarget->getGRLenVT()); +}]>; + +//===----------------------------------------------------------------------===// +// Instruction class templates +//===----------------------------------------------------------------------===// + +class LSX1RI13_VI op, Operand ImmOpnd = simm13> + : Fmt1RI13_VI; + +class LSX2R_VV op> + : Fmt2R_VV; + +class LSX2R_VR op> + : Fmt2R_VR; + +class LSX2R_CV op> + : Fmt2R_CV; + +class LSX2RI1_VVI op, Operand ImmOpnd = uimm1> + : Fmt2RI1_VVI; + +class LSX2RI1_RVI op, Operand ImmOpnd = uimm1> + : Fmt2RI1_RVI; + +class LSX2RI2_VVI op, Operand ImmOpnd = uimm2> + : Fmt2RI2_VVI; + +class LSX2RI2_RVI op, Operand ImmOpnd = uimm2> + : Fmt2RI2_RVI; + +class LSX2RI3_VVI op, Operand ImmOpnd = uimm3> + : Fmt2RI3_VVI; + +class LSX2RI3_RVI op, Operand ImmOpnd = uimm3> + : Fmt2RI3_RVI; + +class LSX2RI4_VVI op, Operand ImmOpnd = uimm4> + : Fmt2RI4_VVI; + +class LSX2RI4_RVI op, Operand ImmOpnd = uimm4> + : Fmt2RI4_RVI; + +class LSX2RI5_VVI op, Operand ImmOpnd = uimm5> + : Fmt2RI5_VVI; + +class LSX2RI6_VVI op, Operand ImmOpnd = uimm6> + : Fmt2RI6_VVI; + +class LSX2RI8_VVI op, Operand ImmOpnd = uimm8> + : Fmt2RI8_VVI; + +class LSX2RI8I1_VRII op, Operand ImmOpnd = simm8, + Operand IdxOpnd = uimm1> + : Fmt2RI8I1_VRII; +class LSX2RI8I2_VRII op, Operand ImmOpnd = simm8, + Operand IdxOpnd = uimm2> + : Fmt2RI8I2_VRII; +class LSX2RI8I3_VRII op, Operand ImmOpnd = simm8, + Operand IdxOpnd = uimm3> + : Fmt2RI8I3_VRII; +class LSX2RI8I4_VRII op, Operand ImmOpnd = simm8, + Operand IdxOpnd = uimm4> + : Fmt2RI8I4_VRII; + +class LSX3R_VVV op> + : Fmt3R_VVV; + +class LSX3R_VVR op> + : Fmt3R_VVR; + +class LSX4R_VVVV op> + : Fmt4R_VVVV; + +let Constraints = "$vd = $dst" in { + +class LSX2RI1_VVRI op, Operand ImmOpnd = uimm1> + : Fmt2RI1_VRI; +class LSX2RI2_VVRI op, Operand ImmOpnd = uimm2> + : Fmt2RI2_VRI; +class LSX2RI3_VVRI op, Operand ImmOpnd = uimm3> + : Fmt2RI3_VRI; +class LSX2RI4_VVRI op, Operand ImmOpnd = uimm4> + : Fmt2RI4_VRI; + +class LSX2RI4_VVVI op, Operand ImmOpnd = uimm4> + : Fmt2RI4_VVI; +class LSX2RI5_VVVI op, Operand ImmOpnd = uimm5> + : Fmt2RI5_VVI; +class LSX2RI6_VVVI op, Operand ImmOpnd = uimm6> + : Fmt2RI6_VVI; +class LSX2RI7_VVVI op, Operand ImmOpnd = uimm7> + : Fmt2RI7_VVI; + +class LSX2RI8_VVVI op, Operand ImmOpnd = uimm8> + : Fmt2RI8_VVI; + +class LSX3R_VVVV op> + : Fmt3R_VVV; + +} // Constraints = "$vd = $dst" + +class LSX2RI9_Load op, Operand ImmOpnd = simm9_lsl3> + : Fmt2RI9_VRI; +class LSX2RI10_Load op, Operand ImmOpnd = simm10_lsl2> + : Fmt2RI10_VRI; +class LSX2RI11_Load op, Operand ImmOpnd = simm11_lsl1> + : Fmt2RI11_VRI; +class LSX2RI12_Load op, Operand ImmOpnd = simm12> + : Fmt2RI12_VRI; +class LSX2RI12_Store op, Operand ImmOpnd = simm12> + : Fmt2RI12_VRI; + +class LSX3R_Load op> + : Fmt3R_VRR; +class LSX3R_Store op> + : Fmt3R_VRR; + +//===----------------------------------------------------------------------===// +// Instructions +//===----------------------------------------------------------------------===// + +let hasSideEffects = 0, Predicates = [HasExtLSX] in { + +let mayLoad = 0, mayStore = 0 in { + +def VADD_B : LSX3R_VVV<0x700a0000>; +def VADD_H : LSX3R_VVV<0x700a8000>; +def VADD_W : LSX3R_VVV<0x700b0000>; +def VADD_D : LSX3R_VVV<0x700b8000>; +def VADD_Q : LSX3R_VVV<0x712d0000>; + +def VSUB_B : LSX3R_VVV<0x700c0000>; +def VSUB_H : LSX3R_VVV<0x700c8000>; +def VSUB_W : LSX3R_VVV<0x700d0000>; +def VSUB_D : LSX3R_VVV<0x700d8000>; +def VSUB_Q : LSX3R_VVV<0x712d8000>; + +def VADDI_BU : LSX2RI5_VVI<0x728a0000>; +def VADDI_HU : LSX2RI5_VVI<0x728a8000>; +def VADDI_WU : LSX2RI5_VVI<0x728b0000>; +def VADDI_DU : LSX2RI5_VVI<0x728b8000>; + +def VSUBI_BU : LSX2RI5_VVI<0x728c0000>; +def VSUBI_HU : LSX2RI5_VVI<0x728c8000>; +def VSUBI_WU : LSX2RI5_VVI<0x728d0000>; +def VSUBI_DU : LSX2RI5_VVI<0x728d8000>; + +def VNEG_B : LSX2R_VV<0x729c3000>; +def VNEG_H : LSX2R_VV<0x729c3400>; +def VNEG_W : LSX2R_VV<0x729c3800>; +def VNEG_D : LSX2R_VV<0x729c3c00>; + +def VSADD_B : LSX3R_VVV<0x70460000>; +def VSADD_H : LSX3R_VVV<0x70468000>; +def VSADD_W : LSX3R_VVV<0x70470000>; +def VSADD_D : LSX3R_VVV<0x70478000>; +def VSADD_BU : LSX3R_VVV<0x704a0000>; +def VSADD_HU : LSX3R_VVV<0x704a8000>; +def VSADD_WU : LSX3R_VVV<0x704b0000>; +def VSADD_DU : LSX3R_VVV<0x704b8000>; + +def VSSUB_B : LSX3R_VVV<0x70480000>; +def VSSUB_H : LSX3R_VVV<0x70488000>; +def VSSUB_W : LSX3R_VVV<0x70490000>; +def VSSUB_D : LSX3R_VVV<0x70498000>; +def VSSUB_BU : LSX3R_VVV<0x704c0000>; +def VSSUB_HU : LSX3R_VVV<0x704c8000>; +def VSSUB_WU : LSX3R_VVV<0x704d0000>; +def VSSUB_DU : LSX3R_VVV<0x704d8000>; + +def VHADDW_H_B : LSX3R_VVV<0x70540000>; +def VHADDW_W_H : LSX3R_VVV<0x70548000>; +def VHADDW_D_W : LSX3R_VVV<0x70550000>; +def VHADDW_Q_D : LSX3R_VVV<0x70558000>; +def VHADDW_HU_BU : LSX3R_VVV<0x70580000>; +def VHADDW_WU_HU : LSX3R_VVV<0x70588000>; +def VHADDW_DU_WU : LSX3R_VVV<0x70590000>; +def VHADDW_QU_DU : LSX3R_VVV<0x70598000>; + +def VHSUBW_H_B : LSX3R_VVV<0x70560000>; +def VHSUBW_W_H : LSX3R_VVV<0x70568000>; +def VHSUBW_D_W : LSX3R_VVV<0x70570000>; +def VHSUBW_Q_D : LSX3R_VVV<0x70578000>; +def VHSUBW_HU_BU : LSX3R_VVV<0x705a0000>; +def VHSUBW_WU_HU : LSX3R_VVV<0x705a8000>; +def VHSUBW_DU_WU : LSX3R_VVV<0x705b0000>; +def VHSUBW_QU_DU : LSX3R_VVV<0x705b8000>; + +def VADDWEV_H_B : LSX3R_VVV<0x701e0000>; +def VADDWEV_W_H : LSX3R_VVV<0x701e8000>; +def VADDWEV_D_W : LSX3R_VVV<0x701f0000>; +def VADDWEV_Q_D : LSX3R_VVV<0x701f8000>; +def VADDWOD_H_B : LSX3R_VVV<0x70220000>; +def VADDWOD_W_H : LSX3R_VVV<0x70228000>; +def VADDWOD_D_W : LSX3R_VVV<0x70230000>; +def VADDWOD_Q_D : LSX3R_VVV<0x70238000>; + +def VSUBWEV_H_B : LSX3R_VVV<0x70200000>; +def VSUBWEV_W_H : LSX3R_VVV<0x70208000>; +def VSUBWEV_D_W : LSX3R_VVV<0x70210000>; +def VSUBWEV_Q_D : LSX3R_VVV<0x70218000>; +def VSUBWOD_H_B : LSX3R_VVV<0x70240000>; +def VSUBWOD_W_H : LSX3R_VVV<0x70248000>; +def VSUBWOD_D_W : LSX3R_VVV<0x70250000>; +def VSUBWOD_Q_D : LSX3R_VVV<0x70258000>; + +def VADDWEV_H_BU : LSX3R_VVV<0x702e0000>; +def VADDWEV_W_HU : LSX3R_VVV<0x702e8000>; +def VADDWEV_D_WU : LSX3R_VVV<0x702f0000>; +def VADDWEV_Q_DU : LSX3R_VVV<0x702f8000>; +def VADDWOD_H_BU : LSX3R_VVV<0x70320000>; +def VADDWOD_W_HU : LSX3R_VVV<0x70328000>; +def VADDWOD_D_WU : LSX3R_VVV<0x70330000>; +def VADDWOD_Q_DU : LSX3R_VVV<0x70338000>; + +def VSUBWEV_H_BU : LSX3R_VVV<0x70300000>; +def VSUBWEV_W_HU : LSX3R_VVV<0x70308000>; +def VSUBWEV_D_WU : LSX3R_VVV<0x70310000>; +def VSUBWEV_Q_DU : LSX3R_VVV<0x70318000>; +def VSUBWOD_H_BU : LSX3R_VVV<0x70340000>; +def VSUBWOD_W_HU : LSX3R_VVV<0x70348000>; +def VSUBWOD_D_WU : LSX3R_VVV<0x70350000>; +def VSUBWOD_Q_DU : LSX3R_VVV<0x70358000>; + +def VADDWEV_H_BU_B : LSX3R_VVV<0x703e0000>; +def VADDWEV_W_HU_H : LSX3R_VVV<0x703e8000>; +def VADDWEV_D_WU_W : LSX3R_VVV<0x703f0000>; +def VADDWEV_Q_DU_D : LSX3R_VVV<0x703f8000>; +def VADDWOD_H_BU_B : LSX3R_VVV<0x70400000>; +def VADDWOD_W_HU_H : LSX3R_VVV<0x70408000>; +def VADDWOD_D_WU_W : LSX3R_VVV<0x70410000>; +def VADDWOD_Q_DU_D : LSX3R_VVV<0x70418000>; + +def VAVG_B : LSX3R_VVV<0x70640000>; +def VAVG_H : LSX3R_VVV<0x70648000>; +def VAVG_W : LSX3R_VVV<0x70650000>; +def VAVG_D : LSX3R_VVV<0x70658000>; +def VAVG_BU : LSX3R_VVV<0x70660000>; +def VAVG_HU : LSX3R_VVV<0x70668000>; +def VAVG_WU : LSX3R_VVV<0x70670000>; +def VAVG_DU : LSX3R_VVV<0x70678000>; +def VAVGR_B : LSX3R_VVV<0x70680000>; +def VAVGR_H : LSX3R_VVV<0x70688000>; +def VAVGR_W : LSX3R_VVV<0x70690000>; +def VAVGR_D : LSX3R_VVV<0x70698000>; +def VAVGR_BU : LSX3R_VVV<0x706a0000>; +def VAVGR_HU : LSX3R_VVV<0x706a8000>; +def VAVGR_WU : LSX3R_VVV<0x706b0000>; +def VAVGR_DU : LSX3R_VVV<0x706b8000>; + +def VABSD_B : LSX3R_VVV<0x70600000>; +def VABSD_H : LSX3R_VVV<0x70608000>; +def VABSD_W : LSX3R_VVV<0x70610000>; +def VABSD_D : LSX3R_VVV<0x70618000>; +def VABSD_BU : LSX3R_VVV<0x70620000>; +def VABSD_HU : LSX3R_VVV<0x70628000>; +def VABSD_WU : LSX3R_VVV<0x70630000>; +def VABSD_DU : LSX3R_VVV<0x70638000>; + +def VADDA_B : LSX3R_VVV<0x705c0000>; +def VADDA_H : LSX3R_VVV<0x705c8000>; +def VADDA_W : LSX3R_VVV<0x705d0000>; +def VADDA_D : LSX3R_VVV<0x705d8000>; + +def VMAX_B : LSX3R_VVV<0x70700000>; +def VMAX_H : LSX3R_VVV<0x70708000>; +def VMAX_W : LSX3R_VVV<0x70710000>; +def VMAX_D : LSX3R_VVV<0x70718000>; +def VMAXI_B : LSX2RI5_VVI<0x72900000, simm5>; +def VMAXI_H : LSX2RI5_VVI<0x72908000, simm5>; +def VMAXI_W : LSX2RI5_VVI<0x72910000, simm5>; +def VMAXI_D : LSX2RI5_VVI<0x72918000, simm5>; +def VMAX_BU : LSX3R_VVV<0x70740000>; +def VMAX_HU : LSX3R_VVV<0x70748000>; +def VMAX_WU : LSX3R_VVV<0x70750000>; +def VMAX_DU : LSX3R_VVV<0x70758000>; +def VMAXI_BU : LSX2RI5_VVI<0x72940000>; +def VMAXI_HU : LSX2RI5_VVI<0x72948000>; +def VMAXI_WU : LSX2RI5_VVI<0x72950000>; +def VMAXI_DU : LSX2RI5_VVI<0x72958000>; + +def VMIN_B : LSX3R_VVV<0x70720000>; +def VMIN_H : LSX3R_VVV<0x70728000>; +def VMIN_W : LSX3R_VVV<0x70730000>; +def VMIN_D : LSX3R_VVV<0x70738000>; +def VMINI_B : LSX2RI5_VVI<0x72920000, simm5>; +def VMINI_H : LSX2RI5_VVI<0x72928000, simm5>; +def VMINI_W : LSX2RI5_VVI<0x72930000, simm5>; +def VMINI_D : LSX2RI5_VVI<0x72938000, simm5>; +def VMIN_BU : LSX3R_VVV<0x70760000>; +def VMIN_HU : LSX3R_VVV<0x70768000>; +def VMIN_WU : LSX3R_VVV<0x70770000>; +def VMIN_DU : LSX3R_VVV<0x70778000>; +def VMINI_BU : LSX2RI5_VVI<0x72960000>; +def VMINI_HU : LSX2RI5_VVI<0x72968000>; +def VMINI_WU : LSX2RI5_VVI<0x72970000>; +def VMINI_DU : LSX2RI5_VVI<0x72978000>; + +def VMUL_B : LSX3R_VVV<0x70840000>; +def VMUL_H : LSX3R_VVV<0x70848000>; +def VMUL_W : LSX3R_VVV<0x70850000>; +def VMUL_D : LSX3R_VVV<0x70858000>; + +def VMUH_B : LSX3R_VVV<0x70860000>; +def VMUH_H : LSX3R_VVV<0x70868000>; +def VMUH_W : LSX3R_VVV<0x70870000>; +def VMUH_D : LSX3R_VVV<0x70878000>; +def VMUH_BU : LSX3R_VVV<0x70880000>; +def VMUH_HU : LSX3R_VVV<0x70888000>; +def VMUH_WU : LSX3R_VVV<0x70890000>; +def VMUH_DU : LSX3R_VVV<0x70898000>; + +def VMULWEV_H_B : LSX3R_VVV<0x70900000>; +def VMULWEV_W_H : LSX3R_VVV<0x70908000>; +def VMULWEV_D_W : LSX3R_VVV<0x70910000>; +def VMULWEV_Q_D : LSX3R_VVV<0x70918000>; +def VMULWOD_H_B : LSX3R_VVV<0x70920000>; +def VMULWOD_W_H : LSX3R_VVV<0x70928000>; +def VMULWOD_D_W : LSX3R_VVV<0x70930000>; +def VMULWOD_Q_D : LSX3R_VVV<0x70938000>; +def VMULWEV_H_BU : LSX3R_VVV<0x70980000>; +def VMULWEV_W_HU : LSX3R_VVV<0x70988000>; +def VMULWEV_D_WU : LSX3R_VVV<0x70990000>; +def VMULWEV_Q_DU : LSX3R_VVV<0x70998000>; +def VMULWOD_H_BU : LSX3R_VVV<0x709a0000>; +def VMULWOD_W_HU : LSX3R_VVV<0x709a8000>; +def VMULWOD_D_WU : LSX3R_VVV<0x709b0000>; +def VMULWOD_Q_DU : LSX3R_VVV<0x709b8000>; +def VMULWEV_H_BU_B : LSX3R_VVV<0x70a00000>; +def VMULWEV_W_HU_H : LSX3R_VVV<0x70a08000>; +def VMULWEV_D_WU_W : LSX3R_VVV<0x70a10000>; +def VMULWEV_Q_DU_D : LSX3R_VVV<0x70a18000>; +def VMULWOD_H_BU_B : LSX3R_VVV<0x70a20000>; +def VMULWOD_W_HU_H : LSX3R_VVV<0x70a28000>; +def VMULWOD_D_WU_W : LSX3R_VVV<0x70a30000>; +def VMULWOD_Q_DU_D : LSX3R_VVV<0x70a38000>; + +def VMADD_B : LSX3R_VVVV<0x70a80000>; +def VMADD_H : LSX3R_VVVV<0x70a88000>; +def VMADD_W : LSX3R_VVVV<0x70a90000>; +def VMADD_D : LSX3R_VVVV<0x70a98000>; + +def VMSUB_B : LSX3R_VVVV<0x70aa0000>; +def VMSUB_H : LSX3R_VVVV<0x70aa8000>; +def VMSUB_W : LSX3R_VVVV<0x70ab0000>; +def VMSUB_D : LSX3R_VVVV<0x70ab8000>; + +def VMADDWEV_H_B : LSX3R_VVVV<0x70ac0000>; +def VMADDWEV_W_H : LSX3R_VVVV<0x70ac8000>; +def VMADDWEV_D_W : LSX3R_VVVV<0x70ad0000>; +def VMADDWEV_Q_D : LSX3R_VVVV<0x70ad8000>; +def VMADDWOD_H_B : LSX3R_VVVV<0x70ae0000>; +def VMADDWOD_W_H : LSX3R_VVVV<0x70ae8000>; +def VMADDWOD_D_W : LSX3R_VVVV<0x70af0000>; +def VMADDWOD_Q_D : LSX3R_VVVV<0x70af8000>; +def VMADDWEV_H_BU : LSX3R_VVVV<0x70b40000>; +def VMADDWEV_W_HU : LSX3R_VVVV<0x70b48000>; +def VMADDWEV_D_WU : LSX3R_VVVV<0x70b50000>; +def VMADDWEV_Q_DU : LSX3R_VVVV<0x70b58000>; +def VMADDWOD_H_BU : LSX3R_VVVV<0x70b60000>; +def VMADDWOD_W_HU : LSX3R_VVVV<0x70b68000>; +def VMADDWOD_D_WU : LSX3R_VVVV<0x70b70000>; +def VMADDWOD_Q_DU : LSX3R_VVVV<0x70b78000>; +def VMADDWEV_H_BU_B : LSX3R_VVVV<0x70bc0000>; +def VMADDWEV_W_HU_H : LSX3R_VVVV<0x70bc8000>; +def VMADDWEV_D_WU_W : LSX3R_VVVV<0x70bd0000>; +def VMADDWEV_Q_DU_D : LSX3R_VVVV<0x70bd8000>; +def VMADDWOD_H_BU_B : LSX3R_VVVV<0x70be0000>; +def VMADDWOD_W_HU_H : LSX3R_VVVV<0x70be8000>; +def VMADDWOD_D_WU_W : LSX3R_VVVV<0x70bf0000>; +def VMADDWOD_Q_DU_D : LSX3R_VVVV<0x70bf8000>; + +def VDIV_B : LSX3R_VVV<0x70e00000>; +def VDIV_H : LSX3R_VVV<0x70e08000>; +def VDIV_W : LSX3R_VVV<0x70e10000>; +def VDIV_D : LSX3R_VVV<0x70e18000>; +def VDIV_BU : LSX3R_VVV<0x70e40000>; +def VDIV_HU : LSX3R_VVV<0x70e48000>; +def VDIV_WU : LSX3R_VVV<0x70e50000>; +def VDIV_DU : LSX3R_VVV<0x70e58000>; + +def VMOD_B : LSX3R_VVV<0x70e20000>; +def VMOD_H : LSX3R_VVV<0x70e28000>; +def VMOD_W : LSX3R_VVV<0x70e30000>; +def VMOD_D : LSX3R_VVV<0x70e38000>; +def VMOD_BU : LSX3R_VVV<0x70e60000>; +def VMOD_HU : LSX3R_VVV<0x70e68000>; +def VMOD_WU : LSX3R_VVV<0x70e70000>; +def VMOD_DU : LSX3R_VVV<0x70e78000>; + +def VSAT_B : LSX2RI3_VVI<0x73242000>; +def VSAT_H : LSX2RI4_VVI<0x73244000>; +def VSAT_W : LSX2RI5_VVI<0x73248000>; +def VSAT_D : LSX2RI6_VVI<0x73250000>; +def VSAT_BU : LSX2RI3_VVI<0x73282000>; +def VSAT_HU : LSX2RI4_VVI<0x73284000>; +def VSAT_WU : LSX2RI5_VVI<0x73288000>; +def VSAT_DU : LSX2RI6_VVI<0x73290000>; + +def VEXTH_H_B : LSX2R_VV<0x729ee000>; +def VEXTH_W_H : LSX2R_VV<0x729ee400>; +def VEXTH_D_W : LSX2R_VV<0x729ee800>; +def VEXTH_Q_D : LSX2R_VV<0x729eec00>; +def VEXTH_HU_BU : LSX2R_VV<0x729ef000>; +def VEXTH_WU_HU : LSX2R_VV<0x729ef400>; +def VEXTH_DU_WU : LSX2R_VV<0x729ef800>; +def VEXTH_QU_DU : LSX2R_VV<0x729efc00>; + +def VSIGNCOV_B : LSX3R_VVV<0x712e0000>; +def VSIGNCOV_H : LSX3R_VVV<0x712e8000>; +def VSIGNCOV_W : LSX3R_VVV<0x712f0000>; +def VSIGNCOV_D : LSX3R_VVV<0x712f8000>; + +def VMSKLTZ_B : LSX2R_VV<0x729c4000>; +def VMSKLTZ_H : LSX2R_VV<0x729c4400>; +def VMSKLTZ_W : LSX2R_VV<0x729c4800>; +def VMSKLTZ_D : LSX2R_VV<0x729c4c00>; + +def VMSKGEZ_B : LSX2R_VV<0x729c5000>; + +def VMSKNZ_B : LSX2R_VV<0x729c6000>; + +def VLDI : LSX1RI13_VI<0x73e00000>; + +def VAND_V : LSX3R_VVV<0x71260000>; +def VOR_V : LSX3R_VVV<0x71268000>; +def VXOR_V : LSX3R_VVV<0x71270000>; +def VNOR_V : LSX3R_VVV<0x71278000>; +def VANDN_V : LSX3R_VVV<0x71280000>; +def VORN_V : LSX3R_VVV<0x71288000>; + +def VANDI_B : LSX2RI8_VVI<0x73d00000>; +def VORI_B : LSX2RI8_VVI<0x73d40000>; +def VXORI_B : LSX2RI8_VVI<0x73d80000>; +def VNORI_B : LSX2RI8_VVI<0x73dc0000>; + +def VSLL_B : LSX3R_VVV<0x70e80000>; +def VSLL_H : LSX3R_VVV<0x70e88000>; +def VSLL_W : LSX3R_VVV<0x70e90000>; +def VSLL_D : LSX3R_VVV<0x70e98000>; +def VSLLI_B : LSX2RI3_VVI<0x732c2000>; +def VSLLI_H : LSX2RI4_VVI<0x732c4000>; +def VSLLI_W : LSX2RI5_VVI<0x732c8000>; +def VSLLI_D : LSX2RI6_VVI<0x732d0000>; + +def VSRL_B : LSX3R_VVV<0x70ea0000>; +def VSRL_H : LSX3R_VVV<0x70ea8000>; +def VSRL_W : LSX3R_VVV<0x70eb0000>; +def VSRL_D : LSX3R_VVV<0x70eb8000>; +def VSRLI_B : LSX2RI3_VVI<0x73302000>; +def VSRLI_H : LSX2RI4_VVI<0x73304000>; +def VSRLI_W : LSX2RI5_VVI<0x73308000>; +def VSRLI_D : LSX2RI6_VVI<0x73310000>; + +def VSRA_B : LSX3R_VVV<0x70ec0000>; +def VSRA_H : LSX3R_VVV<0x70ec8000>; +def VSRA_W : LSX3R_VVV<0x70ed0000>; +def VSRA_D : LSX3R_VVV<0x70ed8000>; +def VSRAI_B : LSX2RI3_VVI<0x73342000>; +def VSRAI_H : LSX2RI4_VVI<0x73344000>; +def VSRAI_W : LSX2RI5_VVI<0x73348000>; +def VSRAI_D : LSX2RI6_VVI<0x73350000>; + +def VROTR_B : LSX3R_VVV<0x70ee0000>; +def VROTR_H : LSX3R_VVV<0x70ee8000>; +def VROTR_W : LSX3R_VVV<0x70ef0000>; +def VROTR_D : LSX3R_VVV<0x70ef8000>; +def VROTRI_B : LSX2RI3_VVI<0x72a02000>; +def VROTRI_H : LSX2RI4_VVI<0x72a04000>; +def VROTRI_W : LSX2RI5_VVI<0x72a08000>; +def VROTRI_D : LSX2RI6_VVI<0x72a10000>; + +def VSLLWIL_H_B : LSX2RI3_VVI<0x73082000>; +def VSLLWIL_W_H : LSX2RI4_VVI<0x73084000>; +def VSLLWIL_D_W : LSX2RI5_VVI<0x73088000>; +def VEXTL_Q_D : LSX2R_VV<0x73090000>; +def VSLLWIL_HU_BU : LSX2RI3_VVI<0x730c2000>; +def VSLLWIL_WU_HU : LSX2RI4_VVI<0x730c4000>; +def VSLLWIL_DU_WU : LSX2RI5_VVI<0x730c8000>; +def VEXTL_QU_DU : LSX2R_VV<0x730d0000>; + +def VSRLR_B : LSX3R_VVV<0x70f00000>; +def VSRLR_H : LSX3R_VVV<0x70f08000>; +def VSRLR_W : LSX3R_VVV<0x70f10000>; +def VSRLR_D : LSX3R_VVV<0x70f18000>; +def VSRLRI_B : LSX2RI3_VVI<0x72a42000>; +def VSRLRI_H : LSX2RI4_VVI<0x72a44000>; +def VSRLRI_W : LSX2RI5_VVI<0x72a48000>; +def VSRLRI_D : LSX2RI6_VVI<0x72a50000>; + +def VSRAR_B : LSX3R_VVV<0x70f20000>; +def VSRAR_H : LSX3R_VVV<0x70f28000>; +def VSRAR_W : LSX3R_VVV<0x70f30000>; +def VSRAR_D : LSX3R_VVV<0x70f38000>; +def VSRARI_B : LSX2RI3_VVI<0x72a82000>; +def VSRARI_H : LSX2RI4_VVI<0x72a84000>; +def VSRARI_W : LSX2RI5_VVI<0x72a88000>; +def VSRARI_D : LSX2RI6_VVI<0x72a90000>; + +def VSRLN_B_H : LSX3R_VVV<0x70f48000>; +def VSRLN_H_W : LSX3R_VVV<0x70f50000>; +def VSRLN_W_D : LSX3R_VVV<0x70f58000>; +def VSRAN_B_H : LSX3R_VVV<0x70f68000>; +def VSRAN_H_W : LSX3R_VVV<0x70f70000>; +def VSRAN_W_D : LSX3R_VVV<0x70f78000>; + +def VSRLNI_B_H : LSX2RI4_VVVI<0x73404000>; +def VSRLNI_H_W : LSX2RI5_VVVI<0x73408000>; +def VSRLNI_W_D : LSX2RI6_VVVI<0x73410000>; +def VSRLNI_D_Q : LSX2RI7_VVVI<0x73420000>; +def VSRANI_B_H : LSX2RI4_VVVI<0x73584000>; +def VSRANI_H_W : LSX2RI5_VVVI<0x73588000>; +def VSRANI_W_D : LSX2RI6_VVVI<0x73590000>; +def VSRANI_D_Q : LSX2RI7_VVVI<0x735a0000>; + +def VSRLRN_B_H : LSX3R_VVV<0x70f88000>; +def VSRLRN_H_W : LSX3R_VVV<0x70f90000>; +def VSRLRN_W_D : LSX3R_VVV<0x70f98000>; +def VSRARN_B_H : LSX3R_VVV<0x70fa8000>; +def VSRARN_H_W : LSX3R_VVV<0x70fb0000>; +def VSRARN_W_D : LSX3R_VVV<0x70fb8000>; + +def VSRLRNI_B_H : LSX2RI4_VVVI<0x73444000>; +def VSRLRNI_H_W : LSX2RI5_VVVI<0x73448000>; +def VSRLRNI_W_D : LSX2RI6_VVVI<0x73450000>; +def VSRLRNI_D_Q : LSX2RI7_VVVI<0x73460000>; +def VSRARNI_B_H : LSX2RI4_VVVI<0x735c4000>; +def VSRARNI_H_W : LSX2RI5_VVVI<0x735c8000>; +def VSRARNI_W_D : LSX2RI6_VVVI<0x735d0000>; +def VSRARNI_D_Q : LSX2RI7_VVVI<0x735e0000>; + +def VSSRLN_B_H : LSX3R_VVV<0x70fc8000>; +def VSSRLN_H_W : LSX3R_VVV<0x70fd0000>; +def VSSRLN_W_D : LSX3R_VVV<0x70fd8000>; +def VSSRAN_B_H : LSX3R_VVV<0x70fe8000>; +def VSSRAN_H_W : LSX3R_VVV<0x70ff0000>; +def VSSRAN_W_D : LSX3R_VVV<0x70ff8000>; +def VSSRLN_BU_H : LSX3R_VVV<0x71048000>; +def VSSRLN_HU_W : LSX3R_VVV<0x71050000>; +def VSSRLN_WU_D : LSX3R_VVV<0x71058000>; +def VSSRAN_BU_H : LSX3R_VVV<0x71068000>; +def VSSRAN_HU_W : LSX3R_VVV<0x71070000>; +def VSSRAN_WU_D : LSX3R_VVV<0x71078000>; + +def VSSRLNI_B_H : LSX2RI4_VVVI<0x73484000>; +def VSSRLNI_H_W : LSX2RI5_VVVI<0x73488000>; +def VSSRLNI_W_D : LSX2RI6_VVVI<0x73490000>; +def VSSRLNI_D_Q : LSX2RI7_VVVI<0x734a0000>; +def VSSRANI_B_H : LSX2RI4_VVVI<0x73604000>; +def VSSRANI_H_W : LSX2RI5_VVVI<0x73608000>; +def VSSRANI_W_D : LSX2RI6_VVVI<0x73610000>; +def VSSRANI_D_Q : LSX2RI7_VVVI<0x73620000>; +def VSSRLNI_BU_H : LSX2RI4_VVVI<0x734c4000>; +def VSSRLNI_HU_W : LSX2RI5_VVVI<0x734c8000>; +def VSSRLNI_WU_D : LSX2RI6_VVVI<0x734d0000>; +def VSSRLNI_DU_Q : LSX2RI7_VVVI<0x734e0000>; +def VSSRANI_BU_H : LSX2RI4_VVVI<0x73644000>; +def VSSRANI_HU_W : LSX2RI5_VVVI<0x73648000>; +def VSSRANI_WU_D : LSX2RI6_VVVI<0x73650000>; +def VSSRANI_DU_Q : LSX2RI7_VVVI<0x73660000>; + +def VSSRLRN_B_H : LSX3R_VVV<0x71008000>; +def VSSRLRN_H_W : LSX3R_VVV<0x71010000>; +def VSSRLRN_W_D : LSX3R_VVV<0x71018000>; +def VSSRARN_B_H : LSX3R_VVV<0x71028000>; +def VSSRARN_H_W : LSX3R_VVV<0x71030000>; +def VSSRARN_W_D : LSX3R_VVV<0x71038000>; +def VSSRLRN_BU_H : LSX3R_VVV<0x71088000>; +def VSSRLRN_HU_W : LSX3R_VVV<0x71090000>; +def VSSRLRN_WU_D : LSX3R_VVV<0x71098000>; +def VSSRARN_BU_H : LSX3R_VVV<0x710a8000>; +def VSSRARN_HU_W : LSX3R_VVV<0x710b0000>; +def VSSRARN_WU_D : LSX3R_VVV<0x710b8000>; + +def VSSRLRNI_B_H : LSX2RI4_VVVI<0x73504000>; +def VSSRLRNI_H_W : LSX2RI5_VVVI<0x73508000>; +def VSSRLRNI_W_D : LSX2RI6_VVVI<0x73510000>; +def VSSRLRNI_D_Q : LSX2RI7_VVVI<0x73520000>; +def VSSRARNI_B_H : LSX2RI4_VVVI<0x73684000>; +def VSSRARNI_H_W : LSX2RI5_VVVI<0x73688000>; +def VSSRARNI_W_D : LSX2RI6_VVVI<0x73690000>; +def VSSRARNI_D_Q : LSX2RI7_VVVI<0x736a0000>; +def VSSRLRNI_BU_H : LSX2RI4_VVVI<0x73544000>; +def VSSRLRNI_HU_W : LSX2RI5_VVVI<0x73548000>; +def VSSRLRNI_WU_D : LSX2RI6_VVVI<0x73550000>; +def VSSRLRNI_DU_Q : LSX2RI7_VVVI<0x73560000>; +def VSSRARNI_BU_H : LSX2RI4_VVVI<0x736c4000>; +def VSSRARNI_HU_W : LSX2RI5_VVVI<0x736c8000>; +def VSSRARNI_WU_D : LSX2RI6_VVVI<0x736d0000>; +def VSSRARNI_DU_Q : LSX2RI7_VVVI<0x736e0000>; + +def VCLO_B : LSX2R_VV<0x729c0000>; +def VCLO_H : LSX2R_VV<0x729c0400>; +def VCLO_W : LSX2R_VV<0x729c0800>; +def VCLO_D : LSX2R_VV<0x729c0c00>; +def VCLZ_B : LSX2R_VV<0x729c1000>; +def VCLZ_H : LSX2R_VV<0x729c1400>; +def VCLZ_W : LSX2R_VV<0x729c1800>; +def VCLZ_D : LSX2R_VV<0x729c1c00>; + +def VPCNT_B : LSX2R_VV<0x729c2000>; +def VPCNT_H : LSX2R_VV<0x729c2400>; +def VPCNT_W : LSX2R_VV<0x729c2800>; +def VPCNT_D : LSX2R_VV<0x729c2c00>; + +def VBITCLR_B : LSX3R_VVV<0x710c0000>; +def VBITCLR_H : LSX3R_VVV<0x710c8000>; +def VBITCLR_W : LSX3R_VVV<0x710d0000>; +def VBITCLR_D : LSX3R_VVV<0x710d8000>; +def VBITCLRI_B : LSX2RI3_VVI<0x73102000>; +def VBITCLRI_H : LSX2RI4_VVI<0x73104000>; +def VBITCLRI_W : LSX2RI5_VVI<0x73108000>; +def VBITCLRI_D : LSX2RI6_VVI<0x73110000>; + +def VBITSET_B : LSX3R_VVV<0x710e0000>; +def VBITSET_H : LSX3R_VVV<0x710e8000>; +def VBITSET_W : LSX3R_VVV<0x710f0000>; +def VBITSET_D : LSX3R_VVV<0x710f8000>; +def VBITSETI_B : LSX2RI3_VVI<0x73142000>; +def VBITSETI_H : LSX2RI4_VVI<0x73144000>; +def VBITSETI_W : LSX2RI5_VVI<0x73148000>; +def VBITSETI_D : LSX2RI6_VVI<0x73150000>; + +def VBITREV_B : LSX3R_VVV<0x71100000>; +def VBITREV_H : LSX3R_VVV<0x71108000>; +def VBITREV_W : LSX3R_VVV<0x71110000>; +def VBITREV_D : LSX3R_VVV<0x71118000>; +def VBITREVI_B : LSX2RI3_VVI<0x73182000>; +def VBITREVI_H : LSX2RI4_VVI<0x73184000>; +def VBITREVI_W : LSX2RI5_VVI<0x73188000>; +def VBITREVI_D : LSX2RI6_VVI<0x73190000>; + +def VFRSTP_B : LSX3R_VVVV<0x712b0000>; +def VFRSTP_H : LSX3R_VVVV<0x712b8000>; +def VFRSTPI_B : LSX2RI5_VVVI<0x729a0000>; +def VFRSTPI_H : LSX2RI5_VVVI<0x729a8000>; + +def VFADD_S : LSX3R_VVV<0x71308000>; +def VFADD_D : LSX3R_VVV<0x71310000>; +def VFSUB_S : LSX3R_VVV<0x71328000>; +def VFSUB_D : LSX3R_VVV<0x71330000>; +def VFMUL_S : LSX3R_VVV<0x71388000>; +def VFMUL_D : LSX3R_VVV<0x71390000>; +def VFDIV_S : LSX3R_VVV<0x713a8000>; +def VFDIV_D : LSX3R_VVV<0x713b0000>; + +def VFMADD_S : LSX4R_VVVV<0x09100000>; +def VFMADD_D : LSX4R_VVVV<0x09200000>; +def VFMSUB_S : LSX4R_VVVV<0x09500000>; +def VFMSUB_D : LSX4R_VVVV<0x09600000>; +def VFNMADD_S : LSX4R_VVVV<0x09900000>; +def VFNMADD_D : LSX4R_VVVV<0x09a00000>; +def VFNMSUB_S : LSX4R_VVVV<0x09d00000>; +def VFNMSUB_D : LSX4R_VVVV<0x09e00000>; + +def VFMAX_S : LSX3R_VVV<0x713c8000>; +def VFMAX_D : LSX3R_VVV<0x713d0000>; +def VFMIN_S : LSX3R_VVV<0x713e8000>; +def VFMIN_D : LSX3R_VVV<0x713f0000>; + +def VFMAXA_S : LSX3R_VVV<0x71408000>; +def VFMAXA_D : LSX3R_VVV<0x71410000>; +def VFMINA_S : LSX3R_VVV<0x71428000>; +def VFMINA_D : LSX3R_VVV<0x71430000>; + +def VFLOGB_S : LSX2R_VV<0x729cc400>; +def VFLOGB_D : LSX2R_VV<0x729cc800>; + +def VFCLASS_S : LSX2R_VV<0x729cd400>; +def VFCLASS_D : LSX2R_VV<0x729cd800>; + +def VFSQRT_S : LSX2R_VV<0x729ce400>; +def VFSQRT_D : LSX2R_VV<0x729ce800>; +def VFRECIP_S : LSX2R_VV<0x729cf400>; +def VFRECIP_D : LSX2R_VV<0x729cf800>; +def VFRSQRT_S : LSX2R_VV<0x729d0400>; +def VFRSQRT_D : LSX2R_VV<0x729d0800>; + +def VFCVTL_S_H : LSX2R_VV<0x729de800>; +def VFCVTH_S_H : LSX2R_VV<0x729dec00>; +def VFCVTL_D_S : LSX2R_VV<0x729df000>; +def VFCVTH_D_S : LSX2R_VV<0x729df400>; +def VFCVT_H_S : LSX3R_VVV<0x71460000>; +def VFCVT_S_D : LSX3R_VVV<0x71468000>; + +def VFRINTRNE_S : LSX2R_VV<0x729d7400>; +def VFRINTRNE_D : LSX2R_VV<0x729d7800>; +def VFRINTRZ_S : LSX2R_VV<0x729d6400>; +def VFRINTRZ_D : LSX2R_VV<0x729d6800>; +def VFRINTRP_S : LSX2R_VV<0x729d5400>; +def VFRINTRP_D : LSX2R_VV<0x729d5800>; +def VFRINTRM_S : LSX2R_VV<0x729d4400>; +def VFRINTRM_D : LSX2R_VV<0x729d4800>; +def VFRINT_S : LSX2R_VV<0x729d3400>; +def VFRINT_D : LSX2R_VV<0x729d3800>; + +def VFTINTRNE_W_S : LSX2R_VV<0x729e5000>; +def VFTINTRNE_L_D : LSX2R_VV<0x729e5400>; +def VFTINTRZ_W_S : LSX2R_VV<0x729e4800>; +def VFTINTRZ_L_D : LSX2R_VV<0x729e4c00>; +def VFTINTRP_W_S : LSX2R_VV<0x729e4000>; +def VFTINTRP_L_D : LSX2R_VV<0x729e4400>; +def VFTINTRM_W_S : LSX2R_VV<0x729e3800>; +def VFTINTRM_L_D : LSX2R_VV<0x729e3c00>; +def VFTINT_W_S : LSX2R_VV<0x729e3000>; +def VFTINT_L_D : LSX2R_VV<0x729e3400>; +def VFTINTRZ_WU_S : LSX2R_VV<0x729e7000>; +def VFTINTRZ_LU_D : LSX2R_VV<0x729e7400>; +def VFTINT_WU_S : LSX2R_VV<0x729e5800>; +def VFTINT_LU_D : LSX2R_VV<0x729e5c00>; + +def VFTINTRNE_W_D : LSX3R_VVV<0x714b8000>; +def VFTINTRZ_W_D : LSX3R_VVV<0x714b0000>; +def VFTINTRP_W_D : LSX3R_VVV<0x714a8000>; +def VFTINTRM_W_D : LSX3R_VVV<0x714a0000>; +def VFTINT_W_D : LSX3R_VVV<0x71498000>; + +def VFTINTRNEL_L_S : LSX2R_VV<0x729ea000>; +def VFTINTRNEH_L_S : LSX2R_VV<0x729ea400>; +def VFTINTRZL_L_S : LSX2R_VV<0x729e9800>; +def VFTINTRZH_L_S : LSX2R_VV<0x729e9c00>; +def VFTINTRPL_L_S : LSX2R_VV<0x729e9000>; +def VFTINTRPH_L_S : LSX2R_VV<0x729e9400>; +def VFTINTRML_L_S : LSX2R_VV<0x729e8800>; +def VFTINTRMH_L_S : LSX2R_VV<0x729e8c00>; +def VFTINTL_L_S : LSX2R_VV<0x729e8000>; +def VFTINTH_L_S : LSX2R_VV<0x729e8400>; + +def VFFINT_S_W : LSX2R_VV<0x729e0000>; +def VFFINT_D_L : LSX2R_VV<0x729e0800>; +def VFFINT_S_WU : LSX2R_VV<0x729e0400>; +def VFFINT_D_LU : LSX2R_VV<0x729e0c00>; +def VFFINTL_D_W : LSX2R_VV<0x729e1000>; +def VFFINTH_D_W : LSX2R_VV<0x729e1400>; +def VFFINT_S_L : LSX3R_VVV<0x71480000>; + +def VSEQ_B : LSX3R_VVV<0x70000000>; +def VSEQ_H : LSX3R_VVV<0x70008000>; +def VSEQ_W : LSX3R_VVV<0x70010000>; +def VSEQ_D : LSX3R_VVV<0x70018000>; +def VSEQI_B : LSX2RI5_VVI<0x72800000, simm5>; +def VSEQI_H : LSX2RI5_VVI<0x72808000, simm5>; +def VSEQI_W : LSX2RI5_VVI<0x72810000, simm5>; +def VSEQI_D : LSX2RI5_VVI<0x72818000, simm5>; + +def VSLE_B : LSX3R_VVV<0x70020000>; +def VSLE_H : LSX3R_VVV<0x70028000>; +def VSLE_W : LSX3R_VVV<0x70030000>; +def VSLE_D : LSX3R_VVV<0x70038000>; +def VSLEI_B : LSX2RI5_VVI<0x72820000, simm5>; +def VSLEI_H : LSX2RI5_VVI<0x72828000, simm5>; +def VSLEI_W : LSX2RI5_VVI<0x72830000, simm5>; +def VSLEI_D : LSX2RI5_VVI<0x72838000, simm5>; + +def VSLE_BU : LSX3R_VVV<0x70040000>; +def VSLE_HU : LSX3R_VVV<0x70048000>; +def VSLE_WU : LSX3R_VVV<0x70050000>; +def VSLE_DU : LSX3R_VVV<0x70058000>; +def VSLEI_BU : LSX2RI5_VVI<0x72840000>; +def VSLEI_HU : LSX2RI5_VVI<0x72848000>; +def VSLEI_WU : LSX2RI5_VVI<0x72850000>; +def VSLEI_DU : LSX2RI5_VVI<0x72858000>; + +def VSLT_B : LSX3R_VVV<0x70060000>; +def VSLT_H : LSX3R_VVV<0x70068000>; +def VSLT_W : LSX3R_VVV<0x70070000>; +def VSLT_D : LSX3R_VVV<0x70078000>; +def VSLTI_B : LSX2RI5_VVI<0x72860000, simm5>; +def VSLTI_H : LSX2RI5_VVI<0x72868000, simm5>; +def VSLTI_W : LSX2RI5_VVI<0x72870000, simm5>; +def VSLTI_D : LSX2RI5_VVI<0x72878000, simm5>; + +def VSLT_BU : LSX3R_VVV<0x70080000>; +def VSLT_HU : LSX3R_VVV<0x70088000>; +def VSLT_WU : LSX3R_VVV<0x70090000>; +def VSLT_DU : LSX3R_VVV<0x70098000>; +def VSLTI_BU : LSX2RI5_VVI<0x72880000>; +def VSLTI_HU : LSX2RI5_VVI<0x72888000>; +def VSLTI_WU : LSX2RI5_VVI<0x72890000>; +def VSLTI_DU : LSX2RI5_VVI<0x72898000>; + +def VFCMP_CAF_S : LSX3R_VVV<0x0c500000>; +def VFCMP_SAF_S : LSX3R_VVV<0x0c508000>; +def VFCMP_CLT_S : LSX3R_VVV<0x0c510000>; +def VFCMP_SLT_S : LSX3R_VVV<0x0c518000>; +def VFCMP_CEQ_S : LSX3R_VVV<0x0c520000>; +def VFCMP_SEQ_S : LSX3R_VVV<0x0c528000>; +def VFCMP_CLE_S : LSX3R_VVV<0x0c530000>; +def VFCMP_SLE_S : LSX3R_VVV<0x0c538000>; +def VFCMP_CUN_S : LSX3R_VVV<0x0c540000>; +def VFCMP_SUN_S : LSX3R_VVV<0x0c548000>; +def VFCMP_CULT_S : LSX3R_VVV<0x0c550000>; +def VFCMP_SULT_S : LSX3R_VVV<0x0c558000>; +def VFCMP_CUEQ_S : LSX3R_VVV<0x0c560000>; +def VFCMP_SUEQ_S : LSX3R_VVV<0x0c568000>; +def VFCMP_CULE_S : LSX3R_VVV<0x0c570000>; +def VFCMP_SULE_S : LSX3R_VVV<0x0c578000>; +def VFCMP_CNE_S : LSX3R_VVV<0x0c580000>; +def VFCMP_SNE_S : LSX3R_VVV<0x0c588000>; +def VFCMP_COR_S : LSX3R_VVV<0x0c5a0000>; +def VFCMP_SOR_S : LSX3R_VVV<0x0c5a8000>; +def VFCMP_CUNE_S : LSX3R_VVV<0x0c5c0000>; +def VFCMP_SUNE_S : LSX3R_VVV<0x0c5c8000>; + +def VFCMP_CAF_D : LSX3R_VVV<0x0c600000>; +def VFCMP_SAF_D : LSX3R_VVV<0x0c608000>; +def VFCMP_CLT_D : LSX3R_VVV<0x0c610000>; +def VFCMP_SLT_D : LSX3R_VVV<0x0c618000>; +def VFCMP_CEQ_D : LSX3R_VVV<0x0c620000>; +def VFCMP_SEQ_D : LSX3R_VVV<0x0c628000>; +def VFCMP_CLE_D : LSX3R_VVV<0x0c630000>; +def VFCMP_SLE_D : LSX3R_VVV<0x0c638000>; +def VFCMP_CUN_D : LSX3R_VVV<0x0c640000>; +def VFCMP_SUN_D : LSX3R_VVV<0x0c648000>; +def VFCMP_CULT_D : LSX3R_VVV<0x0c650000>; +def VFCMP_SULT_D : LSX3R_VVV<0x0c658000>; +def VFCMP_CUEQ_D : LSX3R_VVV<0x0c660000>; +def VFCMP_SUEQ_D : LSX3R_VVV<0x0c668000>; +def VFCMP_CULE_D : LSX3R_VVV<0x0c670000>; +def VFCMP_SULE_D : LSX3R_VVV<0x0c678000>; +def VFCMP_CNE_D : LSX3R_VVV<0x0c680000>; +def VFCMP_SNE_D : LSX3R_VVV<0x0c688000>; +def VFCMP_COR_D : LSX3R_VVV<0x0c6a0000>; +def VFCMP_SOR_D : LSX3R_VVV<0x0c6a8000>; +def VFCMP_CUNE_D : LSX3R_VVV<0x0c6c0000>; +def VFCMP_SUNE_D : LSX3R_VVV<0x0c6c8000>; + +def VBITSEL_V : LSX4R_VVVV<0x0d100000>; + +def VBITSELI_B : LSX2RI8_VVVI<0x73c40000>; + +def VSETEQZ_V : LSX2R_CV<0x729c9800>; +def VSETNEZ_V : LSX2R_CV<0x729c9c00>; +def VSETANYEQZ_B : LSX2R_CV<0x729ca000>; +def VSETANYEQZ_H : LSX2R_CV<0x729ca400>; +def VSETANYEQZ_W : LSX2R_CV<0x729ca800>; +def VSETANYEQZ_D : LSX2R_CV<0x729cac00>; +def VSETALLNEZ_B : LSX2R_CV<0x729cb000>; +def VSETALLNEZ_H : LSX2R_CV<0x729cb400>; +def VSETALLNEZ_W : LSX2R_CV<0x729cb800>; +def VSETALLNEZ_D : LSX2R_CV<0x729cbc00>; + +def VINSGR2VR_B : LSX2RI4_VVRI<0x72eb8000>; +def VINSGR2VR_H : LSX2RI3_VVRI<0x72ebc000>; +def VINSGR2VR_W : LSX2RI2_VVRI<0x72ebe000>; +def VINSGR2VR_D : LSX2RI1_VVRI<0x72ebf000>; +def VPICKVE2GR_B : LSX2RI4_RVI<0x72ef8000>; +def VPICKVE2GR_H : LSX2RI3_RVI<0x72efc000>; +def VPICKVE2GR_W : LSX2RI2_RVI<0x72efe000>; +def VPICKVE2GR_D : LSX2RI1_RVI<0x72eff000>; +def VPICKVE2GR_BU : LSX2RI4_RVI<0x72f38000>; +def VPICKVE2GR_HU : LSX2RI3_RVI<0x72f3c000>; +def VPICKVE2GR_WU : LSX2RI2_RVI<0x72f3e000>; +def VPICKVE2GR_DU : LSX2RI1_RVI<0x72f3f000>; + +def VREPLGR2VR_B : LSX2R_VR<0x729f0000>; +def VREPLGR2VR_H : LSX2R_VR<0x729f0400>; +def VREPLGR2VR_W : LSX2R_VR<0x729f0800>; +def VREPLGR2VR_D : LSX2R_VR<0x729f0c00>; + +def VREPLVE_B : LSX3R_VVR<0x71220000>; +def VREPLVE_H : LSX3R_VVR<0x71228000>; +def VREPLVE_W : LSX3R_VVR<0x71230000>; +def VREPLVE_D : LSX3R_VVR<0x71238000>; +def VREPLVEI_B : LSX2RI4_VVI<0x72f78000>; +def VREPLVEI_H : LSX2RI3_VVI<0x72f7c000>; +def VREPLVEI_W : LSX2RI2_VVI<0x72f7e000>; +def VREPLVEI_D : LSX2RI1_VVI<0x72f7f000>; + +def VBSLL_V : LSX2RI5_VVI<0x728e0000>; +def VBSRL_V : LSX2RI5_VVI<0x728e8000>; + +def VPACKEV_B : LSX3R_VVV<0x71160000>; +def VPACKEV_H : LSX3R_VVV<0x71168000>; +def VPACKEV_W : LSX3R_VVV<0x71170000>; +def VPACKEV_D : LSX3R_VVV<0x71178000>; +def VPACKOD_B : LSX3R_VVV<0x71180000>; +def VPACKOD_H : LSX3R_VVV<0x71188000>; +def VPACKOD_W : LSX3R_VVV<0x71190000>; +def VPACKOD_D : LSX3R_VVV<0x71198000>; + +def VPICKEV_B : LSX3R_VVV<0x711e0000>; +def VPICKEV_H : LSX3R_VVV<0x711e8000>; +def VPICKEV_W : LSX3R_VVV<0x711f0000>; +def VPICKEV_D : LSX3R_VVV<0x711f8000>; +def VPICKOD_B : LSX3R_VVV<0x71200000>; +def VPICKOD_H : LSX3R_VVV<0x71208000>; +def VPICKOD_W : LSX3R_VVV<0x71210000>; +def VPICKOD_D : LSX3R_VVV<0x71218000>; + +def VILVL_B : LSX3R_VVV<0x711a0000>; +def VILVL_H : LSX3R_VVV<0x711a8000>; +def VILVL_W : LSX3R_VVV<0x711b0000>; +def VILVL_D : LSX3R_VVV<0x711b8000>; +def VILVH_B : LSX3R_VVV<0x711c0000>; +def VILVH_H : LSX3R_VVV<0x711c8000>; +def VILVH_W : LSX3R_VVV<0x711d0000>; +def VILVH_D : LSX3R_VVV<0x711d8000>; + +def VSHUF_B : LSX4R_VVVV<0x0d500000>; + +def VSHUF_H : LSX3R_VVVV<0x717a8000>; +def VSHUF_W : LSX3R_VVVV<0x717b0000>; +def VSHUF_D : LSX3R_VVVV<0x717b8000>; + +def VSHUF4I_B : LSX2RI8_VVI<0x73900000>; +def VSHUF4I_H : LSX2RI8_VVI<0x73940000>; +def VSHUF4I_W : LSX2RI8_VVI<0x73980000>; +def VSHUF4I_D : LSX2RI8_VVVI<0x739c0000>; + +def VPERMI_W : LSX2RI8_VVVI<0x73e40000>; + +def VEXTRINS_D : LSX2RI8_VVVI<0x73800000>; +def VEXTRINS_W : LSX2RI8_VVVI<0x73840000>; +def VEXTRINS_H : LSX2RI8_VVVI<0x73880000>; +def VEXTRINS_B : LSX2RI8_VVVI<0x738c0000>; +} // mayLoad = 0, mayStore = 0 + +let mayLoad = 1, mayStore = 0 in { +def VLD : LSX2RI12_Load<0x2c000000>; +def VLDX : LSX3R_Load<0x38400000>; + +def VLDREPL_B : LSX2RI12_Load<0x30800000>; +def VLDREPL_H : LSX2RI11_Load<0x30400000>; +def VLDREPL_W : LSX2RI10_Load<0x30200000>; +def VLDREPL_D : LSX2RI9_Load<0x30100000>; +} // mayLoad = 1, mayStore = 0 + +let mayLoad = 0, mayStore = 1 in { +def VST : LSX2RI12_Store<0x2c400000>; +def VSTX : LSX3R_Store<0x38440000>; + +def VSTELM_B : LSX2RI8I4_VRII<0x31800000>; +def VSTELM_H : LSX2RI8I3_VRII<0x31400000, simm8_lsl1>; +def VSTELM_W : LSX2RI8I2_VRII<0x31200000, simm8_lsl2>; +def VSTELM_D : LSX2RI8I1_VRII<0x31100000, simm8_lsl3>; +} // mayLoad = 0, mayStore = 1 + +} // hasSideEffects = 0, Predicates = [HasExtLSX] + +/// Pseudo-instructions + +let Predicates = [HasExtLSX] in { + +let hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCodeGenOnly = 0, + isAsmParserOnly = 1 in { +def PseudoVREPLI_B : Pseudo<(outs LSX128:$vd), (ins simm10:$imm), [], + "vrepli.b", "$vd, $imm">; +def PseudoVREPLI_H : Pseudo<(outs LSX128:$vd), (ins simm10:$imm), [], + "vrepli.h", "$vd, $imm">; +def PseudoVREPLI_W : Pseudo<(outs LSX128:$vd), (ins simm10:$imm), [], + "vrepli.w", "$vd, $imm">; +def PseudoVREPLI_D : Pseudo<(outs LSX128:$vd), (ins simm10:$imm), [], + "vrepli.d", "$vd, $imm">; +} + +def PseudoVBNZ_B : VecCond; +def PseudoVBNZ_H : VecCond; +def PseudoVBNZ_W : VecCond; +def PseudoVBNZ_D : VecCond; +def PseudoVBNZ : VecCond; + +def PseudoVBZ_B : VecCond; +def PseudoVBZ_H : VecCond; +def PseudoVBZ_W : VecCond; +def PseudoVBZ_D : VecCond; +def PseudoVBZ : VecCond; + +} // Predicates = [HasExtLSX] + +multiclass PatVr { + def : Pat<(v16i8 (OpNode (v16i8 LSX128:$vj))), + (!cast(Inst#"_B") LSX128:$vj)>; + def : Pat<(v8i16 (OpNode (v8i16 LSX128:$vj))), + (!cast(Inst#"_H") LSX128:$vj)>; + def : Pat<(v4i32 (OpNode (v4i32 LSX128:$vj))), + (!cast(Inst#"_W") LSX128:$vj)>; + def : Pat<(v2i64 (OpNode (v2i64 LSX128:$vj))), + (!cast(Inst#"_D") LSX128:$vj)>; +} + +multiclass PatVrF { + def : Pat<(v4f32 (OpNode (v4f32 LSX128:$vj))), + (!cast(Inst#"_S") LSX128:$vj)>; + def : Pat<(v2f64 (OpNode (v2f64 LSX128:$vj))), + (!cast(Inst#"_D") LSX128:$vj)>; +} + +multiclass PatVrVr { + def : Pat<(OpNode (v16i8 LSX128:$vj), (v16i8 LSX128:$vk)), + (!cast(Inst#"_B") LSX128:$vj, LSX128:$vk)>; + def : Pat<(OpNode (v8i16 LSX128:$vj), (v8i16 LSX128:$vk)), + (!cast(Inst#"_H") LSX128:$vj, LSX128:$vk)>; + def : Pat<(OpNode (v4i32 LSX128:$vj), (v4i32 LSX128:$vk)), + (!cast(Inst#"_W") LSX128:$vj, LSX128:$vk)>; + def : Pat<(OpNode (v2i64 LSX128:$vj), (v2i64 LSX128:$vk)), + (!cast(Inst#"_D") LSX128:$vj, LSX128:$vk)>; +} + +multiclass PatVrVrF { + def : Pat<(OpNode (v4f32 LSX128:$vj), (v4f32 LSX128:$vk)), + (!cast(Inst#"_S") LSX128:$vj, LSX128:$vk)>; + def : Pat<(OpNode (v2f64 LSX128:$vj), (v2f64 LSX128:$vk)), + (!cast(Inst#"_D") LSX128:$vj, LSX128:$vk)>; +} + +multiclass PatVrVrU { + def : Pat<(OpNode (v16i8 LSX128:$vj), (v16i8 LSX128:$vk)), + (!cast(Inst#"_BU") LSX128:$vj, LSX128:$vk)>; + def : Pat<(OpNode (v8i16 LSX128:$vj), (v8i16 LSX128:$vk)), + (!cast(Inst#"_HU") LSX128:$vj, LSX128:$vk)>; + def : Pat<(OpNode (v4i32 LSX128:$vj), (v4i32 LSX128:$vk)), + (!cast(Inst#"_WU") LSX128:$vj, LSX128:$vk)>; + def : Pat<(OpNode (v2i64 LSX128:$vj), (v2i64 LSX128:$vk)), + (!cast(Inst#"_DU") LSX128:$vj, LSX128:$vk)>; +} + +multiclass PatVrSimm5 { + def : Pat<(OpNode (v16i8 LSX128:$vj), (v16i8 (SplatPat_simm5 simm5:$imm))), + (!cast(Inst#"_B") LSX128:$vj, simm5:$imm)>; + def : Pat<(OpNode (v8i16 LSX128:$vj), (v8i16 (SplatPat_simm5 simm5:$imm))), + (!cast(Inst#"_H") LSX128:$vj, simm5:$imm)>; + def : Pat<(OpNode (v4i32 LSX128:$vj), (v4i32 (SplatPat_simm5 simm5:$imm))), + (!cast(Inst#"_W") LSX128:$vj, simm5:$imm)>; + def : Pat<(OpNode (v2i64 LSX128:$vj), (v2i64 (SplatPat_simm5 simm5:$imm))), + (!cast(Inst#"_D") LSX128:$vj, simm5:$imm)>; +} + +multiclass PatVrUimm5 { + def : Pat<(OpNode (v16i8 LSX128:$vj), (v16i8 (SplatPat_uimm5 uimm5:$imm))), + (!cast(Inst#"_BU") LSX128:$vj, uimm5:$imm)>; + def : Pat<(OpNode (v8i16 LSX128:$vj), (v8i16 (SplatPat_uimm5 uimm5:$imm))), + (!cast(Inst#"_HU") LSX128:$vj, uimm5:$imm)>; + def : Pat<(OpNode (v4i32 LSX128:$vj), (v4i32 (SplatPat_uimm5 uimm5:$imm))), + (!cast(Inst#"_WU") LSX128:$vj, uimm5:$imm)>; + def : Pat<(OpNode (v2i64 LSX128:$vj), (v2i64 (SplatPat_uimm5 uimm5:$imm))), + (!cast(Inst#"_DU") LSX128:$vj, uimm5:$imm)>; +} + +multiclass PatVrVrVr { + def : Pat<(OpNode (v16i8 LSX128:$vd), (v16i8 LSX128:$vj), (v16i8 LSX128:$vk)), + (!cast(Inst#"_B") LSX128:$vd, LSX128:$vj, LSX128:$vk)>; + def : Pat<(OpNode (v8i16 LSX128:$vd), (v8i16 LSX128:$vj), (v8i16 LSX128:$vk)), + (!cast(Inst#"_H") LSX128:$vd, LSX128:$vj, LSX128:$vk)>; + def : Pat<(OpNode (v4i32 LSX128:$vd), (v4i32 LSX128:$vj), (v4i32 LSX128:$vk)), + (!cast(Inst#"_W") LSX128:$vd, LSX128:$vj, LSX128:$vk)>; + def : Pat<(OpNode (v2i64 LSX128:$vd), (v2i64 LSX128:$vj), (v2i64 LSX128:$vk)), + (!cast(Inst#"_D") LSX128:$vd, LSX128:$vj, LSX128:$vk)>; +} + +multiclass PatShiftVrVr { + def : Pat<(OpNode (v16i8 LSX128:$vj), (and vsplati8_imm_eq_7, + (v16i8 LSX128:$vk))), + (!cast(Inst#"_B") LSX128:$vj, LSX128:$vk)>; + def : Pat<(OpNode (v8i16 LSX128:$vj), (and vsplati16_imm_eq_15, + (v8i16 LSX128:$vk))), + (!cast(Inst#"_H") LSX128:$vj, LSX128:$vk)>; + def : Pat<(OpNode (v4i32 LSX128:$vj), (and vsplati32_imm_eq_31, + (v4i32 LSX128:$vk))), + (!cast(Inst#"_W") LSX128:$vj, LSX128:$vk)>; + def : Pat<(OpNode (v2i64 LSX128:$vj), (and vsplati64_imm_eq_63, + (v2i64 LSX128:$vk))), + (!cast(Inst#"_D") LSX128:$vj, LSX128:$vk)>; +} + +multiclass PatShiftVrUimm { + def : Pat<(OpNode (v16i8 LSX128:$vj), (v16i8 (SplatPat_uimm3 uimm3:$imm))), + (!cast(Inst#"_B") LSX128:$vj, uimm3:$imm)>; + def : Pat<(OpNode (v8i16 LSX128:$vj), (v8i16 (SplatPat_uimm4 uimm4:$imm))), + (!cast(Inst#"_H") LSX128:$vj, uimm4:$imm)>; + def : Pat<(OpNode (v4i32 LSX128:$vj), (v4i32 (SplatPat_uimm5 uimm5:$imm))), + (!cast(Inst#"_W") LSX128:$vj, uimm5:$imm)>; + def : Pat<(OpNode (v2i64 LSX128:$vj), (v2i64 (SplatPat_uimm6 uimm6:$imm))), + (!cast(Inst#"_D") LSX128:$vj, uimm6:$imm)>; +} + +multiclass PatCCVrSimm5 { + def : Pat<(v16i8 (setcc (v16i8 LSX128:$vj), + (v16i8 (SplatPat_simm5 simm5:$imm)), CC)), + (!cast(Inst#"_B") LSX128:$vj, simm5:$imm)>; + def : Pat<(v8i16 (setcc (v8i16 LSX128:$vj), + (v8i16 (SplatPat_simm5 simm5:$imm)), CC)), + (!cast(Inst#"_H") LSX128:$vj, simm5:$imm)>; + def : Pat<(v4i32 (setcc (v4i32 LSX128:$vj), + (v4i32 (SplatPat_simm5 simm5:$imm)), CC)), + (!cast(Inst#"_W") LSX128:$vj, simm5:$imm)>; + def : Pat<(v2i64 (setcc (v2i64 LSX128:$vj), + (v2i64 (SplatPat_simm5 simm5:$imm)), CC)), + (!cast(Inst#"_D") LSX128:$vj, simm5:$imm)>; +} + +multiclass PatCCVrUimm5 { + def : Pat<(v16i8 (setcc (v16i8 LSX128:$vj), + (v16i8 (SplatPat_uimm5 uimm5:$imm)), CC)), + (!cast(Inst#"_BU") LSX128:$vj, uimm5:$imm)>; + def : Pat<(v8i16 (setcc (v8i16 LSX128:$vj), + (v8i16 (SplatPat_uimm5 uimm5:$imm)), CC)), + (!cast(Inst#"_HU") LSX128:$vj, uimm5:$imm)>; + def : Pat<(v4i32 (setcc (v4i32 LSX128:$vj), + (v4i32 (SplatPat_uimm5 uimm5:$imm)), CC)), + (!cast(Inst#"_WU") LSX128:$vj, uimm5:$imm)>; + def : Pat<(v2i64 (setcc (v2i64 LSX128:$vj), + (v2i64 (SplatPat_uimm5 uimm5:$imm)), CC)), + (!cast(Inst#"_DU") LSX128:$vj, uimm5:$imm)>; +} + +multiclass PatCCVrVr { + def : Pat<(v16i8 (setcc (v16i8 LSX128:$vj), (v16i8 LSX128:$vk), CC)), + (!cast(Inst#"_B") LSX128:$vj, LSX128:$vk)>; + def : Pat<(v8i16 (setcc (v8i16 LSX128:$vj), (v8i16 LSX128:$vk), CC)), + (!cast(Inst#"_H") LSX128:$vj, LSX128:$vk)>; + def : Pat<(v4i32 (setcc (v4i32 LSX128:$vj), (v4i32 LSX128:$vk), CC)), + (!cast(Inst#"_W") LSX128:$vj, LSX128:$vk)>; + def : Pat<(v2i64 (setcc (v2i64 LSX128:$vj), (v2i64 LSX128:$vk), CC)), + (!cast(Inst#"_D") LSX128:$vj, LSX128:$vk)>; +} + +multiclass PatCCVrVrU { + def : Pat<(v16i8 (setcc (v16i8 LSX128:$vj), (v16i8 LSX128:$vk), CC)), + (!cast(Inst#"_BU") LSX128:$vj, LSX128:$vk)>; + def : Pat<(v8i16 (setcc (v8i16 LSX128:$vj), (v8i16 LSX128:$vk), CC)), + (!cast(Inst#"_HU") LSX128:$vj, LSX128:$vk)>; + def : Pat<(v4i32 (setcc (v4i32 LSX128:$vj), (v4i32 LSX128:$vk), CC)), + (!cast(Inst#"_WU") LSX128:$vj, LSX128:$vk)>; + def : Pat<(v2i64 (setcc (v2i64 LSX128:$vj), (v2i64 LSX128:$vk), CC)), + (!cast(Inst#"_DU") LSX128:$vj, LSX128:$vk)>; +} + +multiclass PatCCVrVrF { + def : Pat<(v4i32 (setcc (v4f32 LSX128:$vj), (v4f32 LSX128:$vk), CC)), + (!cast(Inst#"_S") LSX128:$vj, LSX128:$vk)>; + def : Pat<(v2i64 (setcc (v2f64 LSX128:$vj), (v2f64 LSX128:$vk), CC)), + (!cast(Inst#"_D") LSX128:$vj, LSX128:$vk)>; +} + +let Predicates = [HasExtLSX] in { + +// VADD_{B/H/W/D} +defm : PatVrVr; +// VSUB_{B/H/W/D} +defm : PatVrVr; + +// VADDI_{B/H/W/D}U +defm : PatVrUimm5; +// VSUBI_{B/H/W/D}U +defm : PatVrUimm5; + +// VNEG_{B/H/W/D} +def : Pat<(sub immAllZerosV, (v16i8 LSX128:$vj)), (VNEG_B LSX128:$vj)>; +def : Pat<(sub immAllZerosV, (v8i16 LSX128:$vj)), (VNEG_H LSX128:$vj)>; +def : Pat<(sub immAllZerosV, (v4i32 LSX128:$vj)), (VNEG_W LSX128:$vj)>; +def : Pat<(sub immAllZerosV, (v2i64 LSX128:$vj)), (VNEG_D LSX128:$vj)>; + +// VMAX[I]_{B/H/W/D}[U] +defm : PatVrVr; +defm : PatVrVrU; +defm : PatVrSimm5; +defm : PatVrUimm5; + +// VMIN[I]_{B/H/W/D}[U] +defm : PatVrVr; +defm : PatVrVrU; +defm : PatVrSimm5; +defm : PatVrUimm5; + +// VMUL_{B/H/W/D} +defm : PatVrVr; + +// VMUH_{B/H/W/D}[U] +defm : PatVrVr; +defm : PatVrVrU; + +// VMADD_{B/H/W/D} +defm : PatVrVrVr; +// VMSUB_{B/H/W/D} +defm : PatVrVrVr; + +// VDIV_{B/H/W/D}[U] +defm : PatVrVr; +defm : PatVrVrU; + +// VMOD_{B/H/W/D}[U] +defm : PatVrVr; +defm : PatVrVrU; + +// VAND_V +foreach vt = [v16i8, v8i16, v4i32, v2i64] in +def : Pat<(and (vt LSX128:$vj), (vt LSX128:$vk)), + (VAND_V LSX128:$vj, LSX128:$vk)>; +// VOR_V +foreach vt = [v16i8, v8i16, v4i32, v2i64] in +def : Pat<(or (vt LSX128:$vj), (vt LSX128:$vk)), + (VOR_V LSX128:$vj, LSX128:$vk)>; +// VXOR_V +foreach vt = [v16i8, v8i16, v4i32, v2i64] in +def : Pat<(xor (vt LSX128:$vj), (vt LSX128:$vk)), + (VXOR_V LSX128:$vj, LSX128:$vk)>; +// VNOR_V +foreach vt = [v16i8, v8i16, v4i32, v2i64] in +def : Pat<(vnot (or (vt LSX128:$vj), (vt LSX128:$vk))), + (VNOR_V LSX128:$vj, LSX128:$vk)>; + +// VANDI_B +def : Pat<(and (v16i8 LSX128:$vj), (v16i8 (SplatPat_uimm8 uimm8:$imm))), + (VANDI_B LSX128:$vj, uimm8:$imm)>; +// VORI_B +def : Pat<(or (v16i8 LSX128:$vj), (v16i8 (SplatPat_uimm8 uimm8:$imm))), + (VORI_B LSX128:$vj, uimm8:$imm)>; + +// VXORI_B +def : Pat<(xor (v16i8 LSX128:$vj), (v16i8 (SplatPat_uimm8 uimm8:$imm))), + (VXORI_B LSX128:$vj, uimm8:$imm)>; + +// VSLL[I]_{B/H/W/D} +defm : PatVrVr; +defm : PatShiftVrVr; +defm : PatShiftVrUimm; + +// VSRL[I]_{B/H/W/D} +defm : PatVrVr; +defm : PatShiftVrVr; +defm : PatShiftVrUimm; + +// VSRA[I]_{B/H/W/D} +defm : PatVrVr; +defm : PatShiftVrVr; +defm : PatShiftVrUimm; + +// VCLZ_{B/H/W/D} +defm : PatVr; + +// VPCNT_{B/H/W/D} +defm : PatVr; + +// VBITCLR_{B/H/W/D} +def : Pat<(and v16i8:$vj, (vnot (shl vsplat_imm_eq_1, v16i8:$vk))), + (v16i8 (VBITCLR_B v16i8:$vj, v16i8:$vk))>; +def : Pat<(and v8i16:$vj, (vnot (shl vsplat_imm_eq_1, v8i16:$vk))), + (v8i16 (VBITCLR_H v8i16:$vj, v8i16:$vk))>; +def : Pat<(and v4i32:$vj, (vnot (shl vsplat_imm_eq_1, v4i32:$vk))), + (v4i32 (VBITCLR_W v4i32:$vj, v4i32:$vk))>; +def : Pat<(and v2i64:$vj, (vnot (shl vsplat_imm_eq_1, v2i64:$vk))), + (v2i64 (VBITCLR_D v2i64:$vj, v2i64:$vk))>; +def : Pat<(and v16i8:$vj, (vnot (shl vsplat_imm_eq_1, + (vsplati8imm7 v16i8:$vk)))), + (v16i8 (VBITCLR_B v16i8:$vj, v16i8:$vk))>; +def : Pat<(and v8i16:$vj, (vnot (shl vsplat_imm_eq_1, + (vsplati16imm15 v8i16:$vk)))), + (v8i16 (VBITCLR_H v8i16:$vj, v8i16:$vk))>; +def : Pat<(and v4i32:$vj, (vnot (shl vsplat_imm_eq_1, + (vsplati32imm31 v4i32:$vk)))), + (v4i32 (VBITCLR_W v4i32:$vj, v4i32:$vk))>; +def : Pat<(and v2i64:$vj, (vnot (shl vsplat_imm_eq_1, + (vsplati64imm63 v2i64:$vk)))), + (v2i64 (VBITCLR_D v2i64:$vj, v2i64:$vk))>; + +// VBITCLRI_{B/H/W/D} +def : Pat<(and (v16i8 LSX128:$vj), (v16i8 (vsplat_uimm_inv_pow2 uimm3:$imm))), + (VBITCLRI_B LSX128:$vj, uimm3:$imm)>; +def : Pat<(and (v8i16 LSX128:$vj), (v8i16 (vsplat_uimm_inv_pow2 uimm4:$imm))), + (VBITCLRI_H LSX128:$vj, uimm4:$imm)>; +def : Pat<(and (v4i32 LSX128:$vj), (v4i32 (vsplat_uimm_inv_pow2 uimm5:$imm))), + (VBITCLRI_W LSX128:$vj, uimm5:$imm)>; +def : Pat<(and (v2i64 LSX128:$vj), (v2i64 (vsplat_uimm_inv_pow2 uimm6:$imm))), + (VBITCLRI_D LSX128:$vj, uimm6:$imm)>; + +// VBITSET_{B/H/W/D} +def : Pat<(or v16i8:$vj, (shl vsplat_imm_eq_1, v16i8:$vk)), + (v16i8 (VBITSET_B v16i8:$vj, v16i8:$vk))>; +def : Pat<(or v8i16:$vj, (shl vsplat_imm_eq_1, v8i16:$vk)), + (v8i16 (VBITSET_H v8i16:$vj, v8i16:$vk))>; +def : Pat<(or v4i32:$vj, (shl vsplat_imm_eq_1, v4i32:$vk)), + (v4i32 (VBITSET_W v4i32:$vj, v4i32:$vk))>; +def : Pat<(or v2i64:$vj, (shl vsplat_imm_eq_1, v2i64:$vk)), + (v2i64 (VBITSET_D v2i64:$vj, v2i64:$vk))>; +def : Pat<(or v16i8:$vj, (shl vsplat_imm_eq_1, (vsplati8imm7 v16i8:$vk))), + (v16i8 (VBITSET_B v16i8:$vj, v16i8:$vk))>; +def : Pat<(or v8i16:$vj, (shl vsplat_imm_eq_1, (vsplati16imm15 v8i16:$vk))), + (v8i16 (VBITSET_H v8i16:$vj, v8i16:$vk))>; +def : Pat<(or v4i32:$vj, (shl vsplat_imm_eq_1, (vsplati32imm31 v4i32:$vk))), + (v4i32 (VBITSET_W v4i32:$vj, v4i32:$vk))>; +def : Pat<(or v2i64:$vj, (shl vsplat_imm_eq_1, (vsplati64imm63 v2i64:$vk))), + (v2i64 (VBITSET_D v2i64:$vj, v2i64:$vk))>; + +// VBITSETI_{B/H/W/D} +def : Pat<(or (v16i8 LSX128:$vj), (v16i8 (vsplat_uimm_pow2 uimm3:$imm))), + (VBITSETI_B LSX128:$vj, uimm3:$imm)>; +def : Pat<(or (v8i16 LSX128:$vj), (v8i16 (vsplat_uimm_pow2 uimm4:$imm))), + (VBITSETI_H LSX128:$vj, uimm4:$imm)>; +def : Pat<(or (v4i32 LSX128:$vj), (v4i32 (vsplat_uimm_pow2 uimm5:$imm))), + (VBITSETI_W LSX128:$vj, uimm5:$imm)>; +def : Pat<(or (v2i64 LSX128:$vj), (v2i64 (vsplat_uimm_pow2 uimm6:$imm))), + (VBITSETI_D LSX128:$vj, uimm6:$imm)>; + +// VBITREV_{B/H/W/D} +def : Pat<(xor v16i8:$vj, (shl vsplat_imm_eq_1, v16i8:$vk)), + (v16i8 (VBITREV_B v16i8:$vj, v16i8:$vk))>; +def : Pat<(xor v8i16:$vj, (shl vsplat_imm_eq_1, v8i16:$vk)), + (v8i16 (VBITREV_H v8i16:$vj, v8i16:$vk))>; +def : Pat<(xor v4i32:$vj, (shl vsplat_imm_eq_1, v4i32:$vk)), + (v4i32 (VBITREV_W v4i32:$vj, v4i32:$vk))>; +def : Pat<(xor v2i64:$vj, (shl vsplat_imm_eq_1, v2i64:$vk)), + (v2i64 (VBITREV_D v2i64:$vj, v2i64:$vk))>; +def : Pat<(xor v16i8:$vj, (shl vsplat_imm_eq_1, (vsplati8imm7 v16i8:$vk))), + (v16i8 (VBITREV_B v16i8:$vj, v16i8:$vk))>; +def : Pat<(xor v8i16:$vj, (shl vsplat_imm_eq_1, (vsplati16imm15 v8i16:$vk))), + (v8i16 (VBITREV_H v8i16:$vj, v8i16:$vk))>; +def : Pat<(xor v4i32:$vj, (shl vsplat_imm_eq_1, (vsplati32imm31 v4i32:$vk))), + (v4i32 (VBITREV_W v4i32:$vj, v4i32:$vk))>; +def : Pat<(xor v2i64:$vj, (shl vsplat_imm_eq_1, (vsplati64imm63 v2i64:$vk))), + (v2i64 (VBITREV_D v2i64:$vj, v2i64:$vk))>; + +// VBITREVI_{B/H/W/D} +def : Pat<(xor (v16i8 LSX128:$vj), (v16i8 (vsplat_uimm_pow2 uimm3:$imm))), + (VBITREVI_B LSX128:$vj, uimm3:$imm)>; +def : Pat<(xor (v8i16 LSX128:$vj), (v8i16 (vsplat_uimm_pow2 uimm4:$imm))), + (VBITREVI_H LSX128:$vj, uimm4:$imm)>; +def : Pat<(xor (v4i32 LSX128:$vj), (v4i32 (vsplat_uimm_pow2 uimm5:$imm))), + (VBITREVI_W LSX128:$vj, uimm5:$imm)>; +def : Pat<(xor (v2i64 LSX128:$vj), (v2i64 (vsplat_uimm_pow2 uimm6:$imm))), + (VBITREVI_D LSX128:$vj, uimm6:$imm)>; + +// VFADD_{S/D} +defm : PatVrVrF; + +// VFSUB_{S/D} +defm : PatVrVrF; + +// VFMUL_{S/D} +defm : PatVrVrF; + +// VFDIV_{S/D} +defm : PatVrVrF; + +// VFMADD_{S/D} +def : Pat<(fma v4f32:$vj, v4f32:$vk, v4f32:$va), + (VFMADD_S v4f32:$vj, v4f32:$vk, v4f32:$va)>; +def : Pat<(fma v2f64:$vj, v2f64:$vk, v2f64:$va), + (VFMADD_D v2f64:$vj, v2f64:$vk, v2f64:$va)>; + +// VFMSUB_{S/D} +def : Pat<(fma v4f32:$vj, v4f32:$vk, (fneg v4f32:$va)), + (VFMSUB_S v4f32:$vj, v4f32:$vk, v4f32:$va)>; +def : Pat<(fma v2f64:$vj, v2f64:$vk, (fneg v2f64:$va)), + (VFMSUB_D v2f64:$vj, v2f64:$vk, v2f64:$va)>; + +// VFNMADD_{S/D} +def : Pat<(fneg (fma v4f32:$vj, v4f32:$vk, v4f32:$va)), + (VFNMADD_S v4f32:$vj, v4f32:$vk, v4f32:$va)>; +def : Pat<(fneg (fma v2f64:$vj, v2f64:$vk, v2f64:$va)), + (VFNMADD_D v2f64:$vj, v2f64:$vk, v2f64:$va)>; +def : Pat<(fma_nsz (fneg v4f32:$vj), v4f32:$vk, (fneg v4f32:$va)), + (VFNMADD_S v4f32:$vj, v4f32:$vk, v4f32:$va)>; +def : Pat<(fma_nsz (fneg v2f64:$vj), v2f64:$vk, (fneg v2f64:$va)), + (VFNMADD_D v2f64:$vj, v2f64:$vk, v2f64:$va)>; + +// VFNMSUB_{S/D} +def : Pat<(fneg (fma v4f32:$vj, v4f32:$vk, (fneg v4f32:$va))), + (VFNMSUB_S v4f32:$vj, v4f32:$vk, v4f32:$va)>; +def : Pat<(fneg (fma v2f64:$vj, v2f64:$vk, (fneg v2f64:$va))), + (VFNMSUB_D v2f64:$vj, v2f64:$vk, v2f64:$va)>; +def : Pat<(fma_nsz (fneg v4f32:$vj), v4f32:$vk, v4f32:$va), + (VFNMSUB_S v4f32:$vj, v4f32:$vk, v4f32:$va)>; +def : Pat<(fma_nsz (fneg v2f64:$vj), v2f64:$vk, v2f64:$va), + (VFNMSUB_D v2f64:$vj, v2f64:$vk, v2f64:$va)>; + +// VFSQRT_{S/D} +defm : PatVrF; + +// VFRECIP_{S/D} +def : Pat<(fdiv vsplatf32_fpimm_eq_1, v4f32:$vj), + (VFRECIP_S v4f32:$vj)>; +def : Pat<(fdiv vsplatf64_fpimm_eq_1, v2f64:$vj), + (VFRECIP_D v2f64:$vj)>; + +// VFRSQRT_{S/D} +def : Pat<(fdiv vsplatf32_fpimm_eq_1, (fsqrt v4f32:$vj)), + (VFRSQRT_S v4f32:$vj)>; +def : Pat<(fdiv vsplatf64_fpimm_eq_1, (fsqrt v2f64:$vj)), + (VFRSQRT_D v2f64:$vj)>; + +// VSEQ[I]_{B/H/W/D} +defm : PatCCVrSimm5; +defm : PatCCVrVr; + +// VSLE[I]_{B/H/W/D}[U] +defm : PatCCVrSimm5; +defm : PatCCVrUimm5; +defm : PatCCVrVr; +defm : PatCCVrVrU; + +// VSLT[I]_{B/H/W/D}[U] +defm : PatCCVrSimm5; +defm : PatCCVrUimm5; +defm : PatCCVrVr; +defm : PatCCVrVrU; + +// VFCMP.cond.{S/D} +defm : PatCCVrVrF; +defm : PatCCVrVrF; +defm : PatCCVrVrF; + +defm : PatCCVrVrF; +defm : PatCCVrVrF; +defm : PatCCVrVrF; + +defm : PatCCVrVrF; +defm : PatCCVrVrF; +defm : PatCCVrVrF; + +defm : PatCCVrVrF; +defm : PatCCVrVrF; +defm : PatCCVrVrF; + +defm : PatCCVrVrF; +defm : PatCCVrVrF; + +// VINSGR2VR_{B/H/W/D} +def : Pat<(vector_insert v16i8:$vd, GRLenVT:$rj, uimm4:$imm), + (VINSGR2VR_B v16i8:$vd, GRLenVT:$rj, uimm4:$imm)>; +def : Pat<(vector_insert v8i16:$vd, GRLenVT:$rj, uimm3:$imm), + (VINSGR2VR_H v8i16:$vd, GRLenVT:$rj, uimm3:$imm)>; +def : Pat<(vector_insert v4i32:$vd, GRLenVT:$rj, uimm2:$imm), + (VINSGR2VR_W v4i32:$vd, GRLenVT:$rj, uimm2:$imm)>; +def : Pat<(vector_insert v2i64:$vd, GRLenVT:$rj, uimm1:$imm), + (VINSGR2VR_D v2i64:$vd, GRLenVT:$rj, uimm1:$imm)>; + +def : Pat<(vector_insert v4f32:$vd, FPR32:$fj, uimm2:$imm), + (VINSGR2VR_W $vd, (COPY_TO_REGCLASS FPR32:$fj, GPR), uimm2:$imm)>; +def : Pat<(vector_insert v2f64:$vd, FPR64:$fj, uimm1:$imm), + (VINSGR2VR_D $vd, (COPY_TO_REGCLASS FPR64:$fj, GPR), uimm1:$imm)>; + +// VPICKVE2GR_{B/H/W}[U] +def : Pat<(loongarch_vpick_sext_elt v16i8:$vd, uimm4:$imm, i8), + (VPICKVE2GR_B v16i8:$vd, uimm4:$imm)>; +def : Pat<(loongarch_vpick_sext_elt v8i16:$vd, uimm3:$imm, i16), + (VPICKVE2GR_H v8i16:$vd, uimm3:$imm)>; +def : Pat<(loongarch_vpick_sext_elt v4i32:$vd, uimm2:$imm, i32), + (VPICKVE2GR_W v4i32:$vd, uimm2:$imm)>; + +def : Pat<(loongarch_vpick_zext_elt v16i8:$vd, uimm4:$imm, i8), + (VPICKVE2GR_BU v16i8:$vd, uimm4:$imm)>; +def : Pat<(loongarch_vpick_zext_elt v8i16:$vd, uimm3:$imm, i16), + (VPICKVE2GR_HU v8i16:$vd, uimm3:$imm)>; +def : Pat<(loongarch_vpick_zext_elt v4i32:$vd, uimm2:$imm, i32), + (VPICKVE2GR_WU v4i32:$vd, uimm2:$imm)>; + +// VREPLGR2VR_{B/H/W/D} +def : Pat<(lsxsplati8 GPR:$rj), (VREPLGR2VR_B GPR:$rj)>; +def : Pat<(lsxsplati16 GPR:$rj), (VREPLGR2VR_H GPR:$rj)>; +def : Pat<(lsxsplati32 GPR:$rj), (VREPLGR2VR_W GPR:$rj)>; +def : Pat<(lsxsplati64 GPR:$rj), (VREPLGR2VR_D GPR:$rj)>; + +// VREPLVE_{B/H/W/D} +def : Pat<(loongarch_vreplve v16i8:$vj, GRLenVT:$rk), + (VREPLVE_B v16i8:$vj, GRLenVT:$rk)>; +def : Pat<(loongarch_vreplve v8i16:$vj, GRLenVT:$rk), + (VREPLVE_H v8i16:$vj, GRLenVT:$rk)>; +def : Pat<(loongarch_vreplve v4i32:$vj, GRLenVT:$rk), + (VREPLVE_W v4i32:$vj, GRLenVT:$rk)>; +def : Pat<(loongarch_vreplve v2i64:$vj, GRLenVT:$rk), + (VREPLVE_D v2i64:$vj, GRLenVT:$rk)>; + +// VSHUF_{B/H/W/D} +def : Pat<(loongarch_vshuf v16i8:$va, v16i8:$vj, v16i8:$vk), + (VSHUF_B v16i8:$vj, v16i8:$vk, v16i8:$va)>; +def : Pat<(loongarch_vshuf v8i16:$vd, v8i16:$vj, v8i16:$vk), + (VSHUF_H v8i16:$vd, v8i16:$vj, v8i16:$vk)>; +def : Pat<(loongarch_vshuf v4i32:$vd, v4i32:$vj, v4i32:$vk), + (VSHUF_W v4i32:$vd, v4i32:$vj, v4i32:$vk)>; +def : Pat<(loongarch_vshuf v2i64:$vd, v2i64:$vj, v2i64:$vk), + (VSHUF_D v2i64:$vd, v2i64:$vj, v2i64:$vk)>; +def : Pat<(loongarch_vshuf v4i32:$vd, v4f32:$vj, v4f32:$vk), + (VSHUF_W v4i32:$vd, v4f32:$vj, v4f32:$vk)>; +def : Pat<(loongarch_vshuf v2i64:$vd, v2f64:$vj, v2f64:$vk), + (VSHUF_D v2i64:$vd, v2f64:$vj, v2f64:$vk)>; + +// VPICKEV_{B/H/W/D} +def : Pat<(loongarch_vpickev v16i8:$vj, v16i8:$vk), + (VPICKEV_B v16i8:$vj, v16i8:$vk)>; +def : Pat<(loongarch_vpickev v8i16:$vj, v8i16:$vk), + (VPICKEV_H v8i16:$vj, v8i16:$vk)>; +def : Pat<(loongarch_vpickev v4i32:$vj, v4i32:$vk), + (VPICKEV_W v4i32:$vj, v4i32:$vk)>; +def : Pat<(loongarch_vpickev v2i64:$vj, v2i64:$vk), + (VPICKEV_D v2i64:$vj, v2i64:$vk)>; +def : Pat<(loongarch_vpickev v4f32:$vj, v4f32:$vk), + (VPICKEV_W v4f32:$vj, v4f32:$vk)>; +def : Pat<(loongarch_vpickev v2f64:$vj, v2f64:$vk), + (VPICKEV_D v2f64:$vj, v2f64:$vk)>; + +// VPICKOD_{B/H/W/D} +def : Pat<(loongarch_vpickod v16i8:$vj, v16i8:$vk), + (VPICKOD_B v16i8:$vj, v16i8:$vk)>; +def : Pat<(loongarch_vpickod v8i16:$vj, v8i16:$vk), + (VPICKOD_H v8i16:$vj, v8i16:$vk)>; +def : Pat<(loongarch_vpickod v4i32:$vj, v4i32:$vk), + (VPICKOD_W v4i32:$vj, v4i32:$vk)>; +def : Pat<(loongarch_vpickod v2i64:$vj, v2i64:$vk), + (VPICKOD_D v2i64:$vj, v2i64:$vk)>; +def : Pat<(loongarch_vpickod v4f32:$vj, v4f32:$vk), + (VPICKOD_W v4f32:$vj, v4f32:$vk)>; +def : Pat<(loongarch_vpickod v2f64:$vj, v2f64:$vk), + (VPICKOD_D v2f64:$vj, v2f64:$vk)>; + +// VPACKEV_{B/H/W/D} +def : Pat<(loongarch_vpackev v16i8:$vj, v16i8:$vk), + (VPACKEV_B v16i8:$vj, v16i8:$vk)>; +def : Pat<(loongarch_vpackev v8i16:$vj, v8i16:$vk), + (VPACKEV_H v8i16:$vj, v8i16:$vk)>; +def : Pat<(loongarch_vpackev v4i32:$vj, v4i32:$vk), + (VPACKEV_W v4i32:$vj, v4i32:$vk)>; +def : Pat<(loongarch_vpackev v2i64:$vj, v2i64:$vk), + (VPACKEV_D v2i64:$vj, v2i64:$vk)>; +def : Pat<(loongarch_vpackev v4f32:$vj, v4f32:$vk), + (VPACKEV_W v4f32:$vj, v4f32:$vk)>; +def : Pat<(loongarch_vpackev v2f64:$vj, v2f64:$vk), + (VPACKEV_D v2f64:$vj, v2f64:$vk)>; + +// VPACKOD_{B/H/W/D} +def : Pat<(loongarch_vpackod v16i8:$vj, v16i8:$vk), + (VPACKOD_B v16i8:$vj, v16i8:$vk)>; +def : Pat<(loongarch_vpackod v8i16:$vj, v8i16:$vk), + (VPACKOD_H v8i16:$vj, v8i16:$vk)>; +def : Pat<(loongarch_vpackod v4i32:$vj, v4i32:$vk), + (VPACKOD_W v4i32:$vj, v4i32:$vk)>; +def : Pat<(loongarch_vpackod v2i64:$vj, v2i64:$vk), + (VPACKOD_D v2i64:$vj, v2i64:$vk)>; +def : Pat<(loongarch_vpackod v4f32:$vj, v4f32:$vk), + (VPACKOD_W v4f32:$vj, v4f32:$vk)>; +def : Pat<(loongarch_vpackod v2f64:$vj, v2f64:$vk), + (VPACKOD_D v2f64:$vj, v2f64:$vk)>; + +// VILVL_{B/H/W/D} +def : Pat<(loongarch_vilvl v16i8:$vj, v16i8:$vk), + (VILVL_B v16i8:$vj, v16i8:$vk)>; +def : Pat<(loongarch_vilvl v8i16:$vj, v8i16:$vk), + (VILVL_H v8i16:$vj, v8i16:$vk)>; +def : Pat<(loongarch_vilvl v4i32:$vj, v4i32:$vk), + (VILVL_W v4i32:$vj, v4i32:$vk)>; +def : Pat<(loongarch_vilvl v2i64:$vj, v2i64:$vk), + (VILVL_D v2i64:$vj, v2i64:$vk)>; +def : Pat<(loongarch_vilvl v4f32:$vj, v4f32:$vk), + (VILVL_W v4f32:$vj, v4f32:$vk)>; +def : Pat<(loongarch_vilvl v2f64:$vj, v2f64:$vk), + (VILVL_D v2f64:$vj, v2f64:$vk)>; + +// VILVH_{B/H/W/D} +def : Pat<(loongarch_vilvh v16i8:$vj, v16i8:$vk), + (VILVH_B v16i8:$vj, v16i8:$vk)>; +def : Pat<(loongarch_vilvh v8i16:$vj, v8i16:$vk), + (VILVH_H v8i16:$vj, v8i16:$vk)>; +def : Pat<(loongarch_vilvh v4i32:$vj, v4i32:$vk), + (VILVH_W v4i32:$vj, v4i32:$vk)>; +def : Pat<(loongarch_vilvh v2i64:$vj, v2i64:$vk), + (VILVH_D v2i64:$vj, v2i64:$vk)>; +def : Pat<(loongarch_vilvh v4f32:$vj, v4f32:$vk), + (VILVH_W v4f32:$vj, v4f32:$vk)>; +def : Pat<(loongarch_vilvh v2f64:$vj, v2f64:$vk), + (VILVH_D v2f64:$vj, v2f64:$vk)>; + +// VSHUF4I_{B/H/W} +def : Pat<(loongarch_vshuf4i v16i8:$vj, immZExt8:$ui8), + (VSHUF4I_B v16i8:$vj, immZExt8:$ui8)>; +def : Pat<(loongarch_vshuf4i v8i16:$vj, immZExt8:$ui8), + (VSHUF4I_H v8i16:$vj, immZExt8:$ui8)>; +def : Pat<(loongarch_vshuf4i v4i32:$vj, immZExt8:$ui8), + (VSHUF4I_W v4i32:$vj, immZExt8:$ui8)>; +def : Pat<(loongarch_vshuf4i v4f32:$vj, immZExt8:$ui8), + (VSHUF4I_W v4f32:$vj, immZExt8:$ui8)>; + +// VREPLVEI_{B/H/W/D} +def : Pat<(loongarch_vreplvei v16i8:$vj, immZExt4:$ui4), + (VREPLVEI_B v16i8:$vj, immZExt4:$ui4)>; +def : Pat<(loongarch_vreplvei v8i16:$vj, immZExt3:$ui3), + (VREPLVEI_H v8i16:$vj, immZExt3:$ui3)>; +def : Pat<(loongarch_vreplvei v4i32:$vj, immZExt2:$ui2), + (VREPLVEI_W v4i32:$vj, immZExt2:$ui2)>; +def : Pat<(loongarch_vreplvei v2i64:$vj, immZExt1:$ui1), + (VREPLVEI_D v2i64:$vj, immZExt1:$ui1)>; +def : Pat<(loongarch_vreplvei v4f32:$vj, immZExt2:$ui2), + (VREPLVEI_W v4f32:$vj, immZExt2:$ui2)>; +def : Pat<(loongarch_vreplvei v2f64:$vj, immZExt1:$ui1), + (VREPLVEI_D v2f64:$vj, immZExt1:$ui1)>; + +// VREPLVEI_{W/D} +def : Pat<(lsxsplatf32 FPR32:$fj), + (VREPLVEI_W (SUBREG_TO_REG (i64 0), FPR32:$fj, sub_32), 0)>; +def : Pat<(lsxsplatf64 FPR64:$fj), + (VREPLVEI_D (SUBREG_TO_REG (i64 0), FPR64:$fj, sub_64), 0)>; + +// Loads/Stores +foreach vt = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in { + defm : LdPat; + def : RegRegLdPat; + defm : StPat; + def : RegRegStPat; +} + +// Vector extraction with constant index. +def : Pat<(i64 (vector_extract v16i8:$vj, uimm4:$imm)), + (VPICKVE2GR_B v16i8:$vj, uimm4:$imm)>; +def : Pat<(i64 (vector_extract v8i16:$vj, uimm3:$imm)), + (VPICKVE2GR_H v8i16:$vj, uimm3:$imm)>; +def : Pat<(i64 (vector_extract v4i32:$vj, uimm2:$imm)), + (VPICKVE2GR_W v4i32:$vj, uimm2:$imm)>; +def : Pat<(i64 (vector_extract v2i64:$vj, uimm1:$imm)), + (VPICKVE2GR_D v2i64:$vj, uimm1:$imm)>; +def : Pat<(f32 (vector_extract v4f32:$vj, uimm2:$imm)), + (f32 (EXTRACT_SUBREG (VREPLVEI_W v4f32:$vj, uimm2:$imm), sub_32))>; +def : Pat<(f64 (vector_extract v2f64:$vj, uimm1:$imm)), + (f64 (EXTRACT_SUBREG (VREPLVEI_D v2f64:$vj, uimm1:$imm), sub_64))>; + +// Vector extraction with variable index. +def : Pat<(i64 (vector_extract v16i8:$vj, i64:$rk)), + (SRAI_W (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (VREPLVE_B v16i8:$vj, + i64:$rk), + sub_32)), + GPR), (i64 24))>; +def : Pat<(i64 (vector_extract v8i16:$vj, i64:$rk)), + (SRAI_W (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (VREPLVE_H v8i16:$vj, + i64:$rk), + sub_32)), + GPR), (i64 16))>; +def : Pat<(i64 (vector_extract v4i32:$vj, i64:$rk)), + (COPY_TO_REGCLASS (f32 (EXTRACT_SUBREG (VREPLVE_W v4i32:$vj, i64:$rk), + sub_32)), + GPR)>; +def : Pat<(i64 (vector_extract v2i64:$vj, i64:$rk)), + (COPY_TO_REGCLASS (f64 (EXTRACT_SUBREG (VREPLVE_D v2i64:$vj, i64:$rk), + sub_64)), + GPR)>; +def : Pat<(f32 (vector_extract v4f32:$vj, i64:$rk)), + (f32 (EXTRACT_SUBREG (VREPLVE_W v4f32:$vj, i64:$rk), sub_32))>; +def : Pat<(f64 (vector_extract v2f64:$vj, i64:$rk)), + (f64 (EXTRACT_SUBREG (VREPLVE_D v2f64:$vj, i64:$rk), sub_64))>; + +// vselect +def : Pat<(v16i8 (vselect LSX128:$vd, (v16i8 (SplatPat_uimm8 uimm8:$imm)), + LSX128:$vj)), + (VBITSELI_B LSX128:$vd, LSX128:$vj, uimm8:$imm)>; +foreach vt = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in + def : Pat<(vt (vselect LSX128:$va, LSX128:$vk, LSX128:$vj)), + (VBITSEL_V LSX128:$vj, LSX128:$vk, LSX128:$va)>; + +// fneg +def : Pat<(fneg (v4f32 LSX128:$vj)), (VBITREVI_W LSX128:$vj, 31)>; +def : Pat<(fneg (v2f64 LSX128:$vj)), (VBITREVI_D LSX128:$vj, 63)>; + +// VFFINT_{S_W/D_L} +def : Pat<(v4f32 (sint_to_fp v4i32:$vj)), (VFFINT_S_W v4i32:$vj)>; +def : Pat<(v2f64 (sint_to_fp v2i64:$vj)), (VFFINT_D_L v2i64:$vj)>; + +// VFFINT_{S_WU/D_LU} +def : Pat<(v4f32 (uint_to_fp v4i32:$vj)), (VFFINT_S_WU v4i32:$vj)>; +def : Pat<(v2f64 (uint_to_fp v2i64:$vj)), (VFFINT_D_LU v2i64:$vj)>; + +// VFTINTRZ_{W_S/L_D} +def : Pat<(v4i32 (fp_to_sint v4f32:$vj)), (VFTINTRZ_W_S v4f32:$vj)>; +def : Pat<(v2i64 (fp_to_sint v2f64:$vj)), (VFTINTRZ_L_D v2f64:$vj)>; + +// VFTINTRZ_{W_SU/L_DU} +def : Pat<(v4i32 (fp_to_uint v4f32:$vj)), (VFTINTRZ_WU_S v4f32:$vj)>; +def : Pat<(v2i64 (fp_to_uint v2f64:$vj)), (VFTINTRZ_LU_D v2f64:$vj)>; + +} // Predicates = [HasExtLSX] + +/// Intrinsic pattern + +class deriveLSXIntrinsic { + Intrinsic ret = !cast(!tolower("int_loongarch_lsx_"#Inst)); +} + +let Predicates = [HasExtLSX] in { + +// vty: v16i8/v8i16/v4i32/v2i64 +// Pat<(Intrinsic vty:$vj, vty:$vk), +// (LAInst vty:$vj, vty:$vk)>; +foreach Inst = ["VSADD_B", "VSADD_BU", "VSSUB_B", "VSSUB_BU", + "VHADDW_H_B", "VHADDW_HU_BU", "VHSUBW_H_B", "VHSUBW_HU_BU", + "VADDWEV_H_B", "VADDWOD_H_B", "VSUBWEV_H_B", "VSUBWOD_H_B", + "VADDWEV_H_BU", "VADDWOD_H_BU", "VSUBWEV_H_BU", "VSUBWOD_H_BU", + "VADDWEV_H_BU_B", "VADDWOD_H_BU_B", + "VAVG_B", "VAVG_BU", "VAVGR_B", "VAVGR_BU", + "VABSD_B", "VABSD_BU", "VADDA_B", "VMUH_B", "VMUH_BU", + "VMULWEV_H_B", "VMULWOD_H_B", "VMULWEV_H_BU", "VMULWOD_H_BU", + "VMULWEV_H_BU_B", "VMULWOD_H_BU_B", "VSIGNCOV_B", + "VANDN_V", "VORN_V", "VROTR_B", "VSRLR_B", "VSRAR_B", + "VSEQ_B", "VSLE_B", "VSLE_BU", "VSLT_B", "VSLT_BU", + "VPACKEV_B", "VPACKOD_B", "VPICKEV_B", "VPICKOD_B", + "VILVL_B", "VILVH_B"] in + def : Pat<(deriveLSXIntrinsic.ret + (v16i8 LSX128:$vj), (v16i8 LSX128:$vk)), + (!cast(Inst) LSX128:$vj, LSX128:$vk)>; +foreach Inst = ["VSADD_H", "VSADD_HU", "VSSUB_H", "VSSUB_HU", + "VHADDW_W_H", "VHADDW_WU_HU", "VHSUBW_W_H", "VHSUBW_WU_HU", + "VADDWEV_W_H", "VADDWOD_W_H", "VSUBWEV_W_H", "VSUBWOD_W_H", + "VADDWEV_W_HU", "VADDWOD_W_HU", "VSUBWEV_W_HU", "VSUBWOD_W_HU", + "VADDWEV_W_HU_H", "VADDWOD_W_HU_H", + "VAVG_H", "VAVG_HU", "VAVGR_H", "VAVGR_HU", + "VABSD_H", "VABSD_HU", "VADDA_H", "VMUH_H", "VMUH_HU", + "VMULWEV_W_H", "VMULWOD_W_H", "VMULWEV_W_HU", "VMULWOD_W_HU", + "VMULWEV_W_HU_H", "VMULWOD_W_HU_H", "VSIGNCOV_H", "VROTR_H", + "VSRLR_H", "VSRAR_H", "VSRLN_B_H", "VSRAN_B_H", "VSRLRN_B_H", + "VSRARN_B_H", "VSSRLN_B_H", "VSSRAN_B_H", "VSSRLN_BU_H", + "VSSRAN_BU_H", "VSSRLRN_B_H", "VSSRARN_B_H", "VSSRLRN_BU_H", + "VSSRARN_BU_H", + "VSEQ_H", "VSLE_H", "VSLE_HU", "VSLT_H", "VSLT_HU", + "VPACKEV_H", "VPACKOD_H", "VPICKEV_H", "VPICKOD_H", + "VILVL_H", "VILVH_H"] in + def : Pat<(deriveLSXIntrinsic.ret + (v8i16 LSX128:$vj), (v8i16 LSX128:$vk)), + (!cast(Inst) LSX128:$vj, LSX128:$vk)>; +foreach Inst = ["VSADD_W", "VSADD_WU", "VSSUB_W", "VSSUB_WU", + "VHADDW_D_W", "VHADDW_DU_WU", "VHSUBW_D_W", "VHSUBW_DU_WU", + "VADDWEV_D_W", "VADDWOD_D_W", "VSUBWEV_D_W", "VSUBWOD_D_W", + "VADDWEV_D_WU", "VADDWOD_D_WU", "VSUBWEV_D_WU", "VSUBWOD_D_WU", + "VADDWEV_D_WU_W", "VADDWOD_D_WU_W", + "VAVG_W", "VAVG_WU", "VAVGR_W", "VAVGR_WU", + "VABSD_W", "VABSD_WU", "VADDA_W", "VMUH_W", "VMUH_WU", + "VMULWEV_D_W", "VMULWOD_D_W", "VMULWEV_D_WU", "VMULWOD_D_WU", + "VMULWEV_D_WU_W", "VMULWOD_D_WU_W", "VSIGNCOV_W", "VROTR_W", + "VSRLR_W", "VSRAR_W", "VSRLN_H_W", "VSRAN_H_W", "VSRLRN_H_W", + "VSRARN_H_W", "VSSRLN_H_W", "VSSRAN_H_W", "VSSRLN_HU_W", + "VSSRAN_HU_W", "VSSRLRN_H_W", "VSSRARN_H_W", "VSSRLRN_HU_W", + "VSSRARN_HU_W", + "VSEQ_W", "VSLE_W", "VSLE_WU", "VSLT_W", "VSLT_WU", + "VPACKEV_W", "VPACKOD_W", "VPICKEV_W", "VPICKOD_W", + "VILVL_W", "VILVH_W"] in + def : Pat<(deriveLSXIntrinsic.ret + (v4i32 LSX128:$vj), (v4i32 LSX128:$vk)), + (!cast(Inst) LSX128:$vj, LSX128:$vk)>; +foreach Inst = ["VADD_Q", "VSUB_Q", + "VSADD_D", "VSADD_DU", "VSSUB_D", "VSSUB_DU", + "VHADDW_Q_D", "VHADDW_QU_DU", "VHSUBW_Q_D", "VHSUBW_QU_DU", + "VADDWEV_Q_D", "VADDWOD_Q_D", "VSUBWEV_Q_D", "VSUBWOD_Q_D", + "VADDWEV_Q_DU", "VADDWOD_Q_DU", "VSUBWEV_Q_DU", "VSUBWOD_Q_DU", + "VADDWEV_Q_DU_D", "VADDWOD_Q_DU_D", + "VAVG_D", "VAVG_DU", "VAVGR_D", "VAVGR_DU", + "VABSD_D", "VABSD_DU", "VADDA_D", "VMUH_D", "VMUH_DU", + "VMULWEV_Q_D", "VMULWOD_Q_D", "VMULWEV_Q_DU", "VMULWOD_Q_DU", + "VMULWEV_Q_DU_D", "VMULWOD_Q_DU_D", "VSIGNCOV_D", "VROTR_D", + "VSRLR_D", "VSRAR_D", "VSRLN_W_D", "VSRAN_W_D", "VSRLRN_W_D", + "VSRARN_W_D", "VSSRLN_W_D", "VSSRAN_W_D", "VSSRLN_WU_D", + "VSSRAN_WU_D", "VSSRLRN_W_D", "VSSRARN_W_D", "VSSRLRN_WU_D", + "VSSRARN_WU_D", "VFFINT_S_L", + "VSEQ_D", "VSLE_D", "VSLE_DU", "VSLT_D", "VSLT_DU", + "VPACKEV_D", "VPACKOD_D", "VPICKEV_D", "VPICKOD_D", + "VILVL_D", "VILVH_D"] in + def : Pat<(deriveLSXIntrinsic.ret + (v2i64 LSX128:$vj), (v2i64 LSX128:$vk)), + (!cast(Inst) LSX128:$vj, LSX128:$vk)>; + +// vty: v16i8/v8i16/v4i32/v2i64 +// Pat<(Intrinsic vty:$vd, vty:$vj, vty:$vk), +// (LAInst vty:$vd, vty:$vj, vty:$vk)>; +foreach Inst = ["VMADDWEV_H_B", "VMADDWOD_H_B", "VMADDWEV_H_BU", + "VMADDWOD_H_BU", "VMADDWEV_H_BU_B", "VMADDWOD_H_BU_B"] in + def : Pat<(deriveLSXIntrinsic.ret + (v8i16 LSX128:$vd), (v16i8 LSX128:$vj), (v16i8 LSX128:$vk)), + (!cast(Inst) LSX128:$vd, LSX128:$vj, LSX128:$vk)>; +foreach Inst = ["VMADDWEV_W_H", "VMADDWOD_W_H", "VMADDWEV_W_HU", + "VMADDWOD_W_HU", "VMADDWEV_W_HU_H", "VMADDWOD_W_HU_H"] in + def : Pat<(deriveLSXIntrinsic.ret + (v4i32 LSX128:$vd), (v8i16 LSX128:$vj), (v8i16 LSX128:$vk)), + (!cast(Inst) LSX128:$vd, LSX128:$vj, LSX128:$vk)>; +foreach Inst = ["VMADDWEV_D_W", "VMADDWOD_D_W", "VMADDWEV_D_WU", + "VMADDWOD_D_WU", "VMADDWEV_D_WU_W", "VMADDWOD_D_WU_W"] in + def : Pat<(deriveLSXIntrinsic.ret + (v2i64 LSX128:$vd), (v4i32 LSX128:$vj), (v4i32 LSX128:$vk)), + (!cast(Inst) LSX128:$vd, LSX128:$vj, LSX128:$vk)>; +foreach Inst = ["VMADDWEV_Q_D", "VMADDWOD_Q_D", "VMADDWEV_Q_DU", + "VMADDWOD_Q_DU", "VMADDWEV_Q_DU_D", "VMADDWOD_Q_DU_D"] in + def : Pat<(deriveLSXIntrinsic.ret + (v2i64 LSX128:$vd), (v2i64 LSX128:$vj), (v2i64 LSX128:$vk)), + (!cast(Inst) LSX128:$vd, LSX128:$vj, LSX128:$vk)>; + +// vty: v16i8/v8i16/v4i32/v2i64 +// Pat<(Intrinsic vty:$vj), +// (LAInst vty:$vj)>; +foreach Inst = ["VEXTH_H_B", "VEXTH_HU_BU", + "VMSKLTZ_B", "VMSKGEZ_B", "VMSKNZ_B", + "VCLO_B"] in + def : Pat<(deriveLSXIntrinsic.ret (v16i8 LSX128:$vj)), + (!cast(Inst) LSX128:$vj)>; +foreach Inst = ["VEXTH_W_H", "VEXTH_WU_HU", "VMSKLTZ_H", + "VCLO_H", "VFCVTL_S_H", "VFCVTH_S_H"] in + def : Pat<(deriveLSXIntrinsic.ret (v8i16 LSX128:$vj)), + (!cast(Inst) LSX128:$vj)>; +foreach Inst = ["VEXTH_D_W", "VEXTH_DU_WU", "VMSKLTZ_W", + "VCLO_W", "VFFINT_S_W", "VFFINT_S_WU", + "VFFINTL_D_W", "VFFINTH_D_W"] in + def : Pat<(deriveLSXIntrinsic.ret (v4i32 LSX128:$vj)), + (!cast(Inst) LSX128:$vj)>; +foreach Inst = ["VEXTH_Q_D", "VEXTH_QU_DU", "VMSKLTZ_D", + "VEXTL_Q_D", "VEXTL_QU_DU", + "VCLO_D", "VFFINT_D_L", "VFFINT_D_LU"] in + def : Pat<(deriveLSXIntrinsic.ret (v2i64 LSX128:$vj)), + (!cast(Inst) LSX128:$vj)>; + +// Pat<(Intrinsic timm:$imm) +// (LAInst timm:$imm)>; +def : Pat<(int_loongarch_lsx_vldi timm:$imm), + (VLDI (to_valid_timm timm:$imm))>; +foreach Inst = ["VREPLI_B", "VREPLI_H", "VREPLI_W", "VREPLI_D"] in + def : Pat<(deriveLSXIntrinsic.ret timm:$imm), + (!cast("Pseudo"#Inst) (to_valid_timm timm:$imm))>; + +// vty: v16i8/v8i16/v4i32/v2i64 +// Pat<(Intrinsic vty:$vj, timm:$imm) +// (LAInst vty:$vj, timm:$imm)>; +foreach Inst = ["VSAT_B", "VSAT_BU", "VNORI_B", "VROTRI_B", "VSLLWIL_H_B", + "VSLLWIL_HU_BU", "VSRLRI_B", "VSRARI_B", + "VSEQI_B", "VSLEI_B", "VSLEI_BU", "VSLTI_B", "VSLTI_BU", + "VREPLVEI_B", "VBSLL_V", "VBSRL_V", "VSHUF4I_B"] in + def : Pat<(deriveLSXIntrinsic.ret (v16i8 LSX128:$vj), timm:$imm), + (!cast(Inst) LSX128:$vj, (to_valid_timm timm:$imm))>; +foreach Inst = ["VSAT_H", "VSAT_HU", "VROTRI_H", "VSLLWIL_W_H", + "VSLLWIL_WU_HU", "VSRLRI_H", "VSRARI_H", + "VSEQI_H", "VSLEI_H", "VSLEI_HU", "VSLTI_H", "VSLTI_HU", + "VREPLVEI_H", "VSHUF4I_H"] in + def : Pat<(deriveLSXIntrinsic.ret (v8i16 LSX128:$vj), timm:$imm), + (!cast(Inst) LSX128:$vj, (to_valid_timm timm:$imm))>; +foreach Inst = ["VSAT_W", "VSAT_WU", "VROTRI_W", "VSLLWIL_D_W", + "VSLLWIL_DU_WU", "VSRLRI_W", "VSRARI_W", + "VSEQI_W", "VSLEI_W", "VSLEI_WU", "VSLTI_W", "VSLTI_WU", + "VREPLVEI_W", "VSHUF4I_W"] in + def : Pat<(deriveLSXIntrinsic.ret (v4i32 LSX128:$vj), timm:$imm), + (!cast(Inst) LSX128:$vj, (to_valid_timm timm:$imm))>; +foreach Inst = ["VSAT_D", "VSAT_DU", "VROTRI_D", "VSRLRI_D", "VSRARI_D", + "VSEQI_D", "VSLEI_D", "VSLEI_DU", "VSLTI_D", "VSLTI_DU", + "VPICKVE2GR_D", "VPICKVE2GR_DU", + "VREPLVEI_D"] in + def : Pat<(deriveLSXIntrinsic.ret (v2i64 LSX128:$vj), timm:$imm), + (!cast(Inst) LSX128:$vj, (to_valid_timm timm:$imm))>; + +// vty: v16i8/v8i16/v4i32/v2i64 +// Pat<(Intrinsic vty:$vd, vty:$vj, timm:$imm) +// (LAInst vty:$vd, vty:$vj, timm:$imm)>; +foreach Inst = ["VSRLNI_B_H", "VSRANI_B_H", "VSRLRNI_B_H", "VSRARNI_B_H", + "VSSRLNI_B_H", "VSSRANI_B_H", "VSSRLNI_BU_H", "VSSRANI_BU_H", + "VSSRLRNI_B_H", "VSSRARNI_B_H", "VSSRLRNI_BU_H", "VSSRARNI_BU_H", + "VFRSTPI_B", "VBITSELI_B", "VEXTRINS_B"] in + def : Pat<(deriveLSXIntrinsic.ret + (v16i8 LSX128:$vd), (v16i8 LSX128:$vj), timm:$imm), + (!cast(Inst) LSX128:$vd, LSX128:$vj, + (to_valid_timm timm:$imm))>; +foreach Inst = ["VSRLNI_H_W", "VSRANI_H_W", "VSRLRNI_H_W", "VSRARNI_H_W", + "VSSRLNI_H_W", "VSSRANI_H_W", "VSSRLNI_HU_W", "VSSRANI_HU_W", + "VSSRLRNI_H_W", "VSSRARNI_H_W", "VSSRLRNI_HU_W", "VSSRARNI_HU_W", + "VFRSTPI_H", "VEXTRINS_H"] in + def : Pat<(deriveLSXIntrinsic.ret + (v8i16 LSX128:$vd), (v8i16 LSX128:$vj), timm:$imm), + (!cast(Inst) LSX128:$vd, LSX128:$vj, + (to_valid_timm timm:$imm))>; +foreach Inst = ["VSRLNI_W_D", "VSRANI_W_D", "VSRLRNI_W_D", "VSRARNI_W_D", + "VSSRLNI_W_D", "VSSRANI_W_D", "VSSRLNI_WU_D", "VSSRANI_WU_D", + "VSSRLRNI_W_D", "VSSRARNI_W_D", "VSSRLRNI_WU_D", "VSSRARNI_WU_D", + "VPERMI_W", "VEXTRINS_W"] in + def : Pat<(deriveLSXIntrinsic.ret + (v4i32 LSX128:$vd), (v4i32 LSX128:$vj), timm:$imm), + (!cast(Inst) LSX128:$vd, LSX128:$vj, + (to_valid_timm timm:$imm))>; +foreach Inst = ["VSRLNI_D_Q", "VSRANI_D_Q", "VSRLRNI_D_Q", "VSRARNI_D_Q", + "VSSRLNI_D_Q", "VSSRANI_D_Q", "VSSRLNI_DU_Q", "VSSRANI_DU_Q", + "VSSRLRNI_D_Q", "VSSRARNI_D_Q", "VSSRLRNI_DU_Q", "VSSRARNI_DU_Q", + "VSHUF4I_D", "VEXTRINS_D"] in + def : Pat<(deriveLSXIntrinsic.ret + (v2i64 LSX128:$vd), (v2i64 LSX128:$vj), timm:$imm), + (!cast(Inst) LSX128:$vd, LSX128:$vj, + (to_valid_timm timm:$imm))>; + +// vty: v16i8/v8i16/v4i32/v2i64 +// Pat<(Intrinsic vty:$vd, vty:$vj, vty:$vk), +// (LAInst vty:$vd, vty:$vj, vty:$vk)>; +foreach Inst = ["VFRSTP_B", "VBITSEL_V", "VSHUF_B"] in + def : Pat<(deriveLSXIntrinsic.ret + (v16i8 LSX128:$vd), (v16i8 LSX128:$vj), (v16i8 LSX128:$vk)), + (!cast(Inst) LSX128:$vd, LSX128:$vj, LSX128:$vk)>; +foreach Inst = ["VFRSTP_H", "VSHUF_H"] in + def : Pat<(deriveLSXIntrinsic.ret + (v8i16 LSX128:$vd), (v8i16 LSX128:$vj), (v8i16 LSX128:$vk)), + (!cast(Inst) LSX128:$vd, LSX128:$vj, LSX128:$vk)>; +def : Pat<(int_loongarch_lsx_vshuf_w (v4i32 LSX128:$vd), (v4i32 LSX128:$vj), + (v4i32 LSX128:$vk)), + (VSHUF_W LSX128:$vd, LSX128:$vj, LSX128:$vk)>; +def : Pat<(int_loongarch_lsx_vshuf_d (v2i64 LSX128:$vd), (v2i64 LSX128:$vj), + (v2i64 LSX128:$vk)), + (VSHUF_D LSX128:$vd, LSX128:$vj, LSX128:$vk)>; + +// vty: v4f32/v2f64 +// Pat<(Intrinsic vty:$vj, vty:$vk, vty:$va), +// (LAInst vty:$vj, vty:$vk, vty:$va)>; +foreach Inst = ["VFMSUB_S", "VFNMADD_S", "VFNMSUB_S"] in + def : Pat<(deriveLSXIntrinsic.ret + (v4f32 LSX128:$vj), (v4f32 LSX128:$vk), (v4f32 LSX128:$va)), + (!cast(Inst) LSX128:$vj, LSX128:$vk, LSX128:$va)>; +foreach Inst = ["VFMSUB_D", "VFNMADD_D", "VFNMSUB_D"] in + def : Pat<(deriveLSXIntrinsic.ret + (v2f64 LSX128:$vj), (v2f64 LSX128:$vk), (v2f64 LSX128:$va)), + (!cast(Inst) LSX128:$vj, LSX128:$vk, LSX128:$va)>; + +// vty: v4f32/v2f64 +// Pat<(Intrinsic vty:$vj, vty:$vk), +// (LAInst vty:$vj, vty:$vk)>; +foreach Inst = ["VFMAX_S", "VFMIN_S", "VFMAXA_S", "VFMINA_S", "VFCVT_H_S", + "VFCMP_CAF_S", "VFCMP_CUN_S", "VFCMP_CEQ_S", "VFCMP_CUEQ_S", + "VFCMP_CLT_S", "VFCMP_CULT_S", "VFCMP_CLE_S", "VFCMP_CULE_S", + "VFCMP_CNE_S", "VFCMP_COR_S", "VFCMP_CUNE_S", + "VFCMP_SAF_S", "VFCMP_SUN_S", "VFCMP_SEQ_S", "VFCMP_SUEQ_S", + "VFCMP_SLT_S", "VFCMP_SULT_S", "VFCMP_SLE_S", "VFCMP_SULE_S", + "VFCMP_SNE_S", "VFCMP_SOR_S", "VFCMP_SUNE_S"] in + def : Pat<(deriveLSXIntrinsic.ret + (v4f32 LSX128:$vj), (v4f32 LSX128:$vk)), + (!cast(Inst) LSX128:$vj, LSX128:$vk)>; +foreach Inst = ["VFMAX_D", "VFMIN_D", "VFMAXA_D", "VFMINA_D", "VFCVT_S_D", + "VFTINTRNE_W_D", "VFTINTRZ_W_D", "VFTINTRP_W_D", "VFTINTRM_W_D", + "VFTINT_W_D", + "VFCMP_CAF_D", "VFCMP_CUN_D", "VFCMP_CEQ_D", "VFCMP_CUEQ_D", + "VFCMP_CLT_D", "VFCMP_CULT_D", "VFCMP_CLE_D", "VFCMP_CULE_D", + "VFCMP_CNE_D", "VFCMP_COR_D", "VFCMP_CUNE_D", + "VFCMP_SAF_D", "VFCMP_SUN_D", "VFCMP_SEQ_D", "VFCMP_SUEQ_D", + "VFCMP_SLT_D", "VFCMP_SULT_D", "VFCMP_SLE_D", "VFCMP_SULE_D", + "VFCMP_SNE_D", "VFCMP_SOR_D", "VFCMP_SUNE_D"] in + def : Pat<(deriveLSXIntrinsic.ret + (v2f64 LSX128:$vj), (v2f64 LSX128:$vk)), + (!cast(Inst) LSX128:$vj, LSX128:$vk)>; + +// vty: v4f32/v2f64 +// Pat<(Intrinsic vty:$vj), +// (LAInst vty:$vj)>; +foreach Inst = ["VFLOGB_S", "VFCLASS_S", "VFSQRT_S", "VFRECIP_S", "VFRSQRT_S", + "VFRINT_S", "VFCVTL_D_S", "VFCVTH_D_S", + "VFRINTRNE_S", "VFRINTRZ_S", "VFRINTRP_S", "VFRINTRM_S", + "VFTINTRNE_W_S", "VFTINTRZ_W_S", "VFTINTRP_W_S", "VFTINTRM_W_S", + "VFTINT_W_S", "VFTINTRZ_WU_S", "VFTINT_WU_S", + "VFTINTRNEL_L_S", "VFTINTRNEH_L_S", "VFTINTRZL_L_S", + "VFTINTRZH_L_S", "VFTINTRPL_L_S", "VFTINTRPH_L_S", + "VFTINTRML_L_S", "VFTINTRMH_L_S", "VFTINTL_L_S", + "VFTINTH_L_S"] in + def : Pat<(deriveLSXIntrinsic.ret (v4f32 LSX128:$vj)), + (!cast(Inst) LSX128:$vj)>; +foreach Inst = ["VFLOGB_D", "VFCLASS_D", "VFSQRT_D", "VFRECIP_D", "VFRSQRT_D", + "VFRINT_D", + "VFRINTRNE_D", "VFRINTRZ_D", "VFRINTRP_D", "VFRINTRM_D", + "VFTINTRNE_L_D", "VFTINTRZ_L_D", "VFTINTRP_L_D", "VFTINTRM_L_D", + "VFTINT_L_D", "VFTINTRZ_LU_D", "VFTINT_LU_D"] in + def : Pat<(deriveLSXIntrinsic.ret (v2f64 LSX128:$vj)), + (!cast(Inst) LSX128:$vj)>; + +// load +def : Pat<(int_loongarch_lsx_vld GPR:$rj, timm:$imm), + (VLD GPR:$rj, (to_valid_timm timm:$imm))>; +def : Pat<(int_loongarch_lsx_vldx GPR:$rj, GPR:$rk), + (VLDX GPR:$rj, GPR:$rk)>; + +def : Pat<(int_loongarch_lsx_vldrepl_b GPR:$rj, timm:$imm), + (VLDREPL_B GPR:$rj, (to_valid_timm timm:$imm))>; +def : Pat<(int_loongarch_lsx_vldrepl_h GPR:$rj, timm:$imm), + (VLDREPL_H GPR:$rj, (to_valid_timm timm:$imm))>; +def : Pat<(int_loongarch_lsx_vldrepl_w GPR:$rj, timm:$imm), + (VLDREPL_W GPR:$rj, (to_valid_timm timm:$imm))>; +def : Pat<(int_loongarch_lsx_vldrepl_d GPR:$rj, timm:$imm), + (VLDREPL_D GPR:$rj, (to_valid_timm timm:$imm))>; + +// store +def : Pat<(int_loongarch_lsx_vst LSX128:$vd, GPR:$rj, timm:$imm), + (VST LSX128:$vd, GPR:$rj, (to_valid_timm timm:$imm))>; +def : Pat<(int_loongarch_lsx_vstx LSX128:$vd, GPR:$rj, GPR:$rk), + (VSTX LSX128:$vd, GPR:$rj, GPR:$rk)>; + +def : Pat<(int_loongarch_lsx_vstelm_b v16i8:$vd, GPR:$rj, timm:$imm, timm:$idx), + (VSTELM_B v16i8:$vd, GPR:$rj, (to_valid_timm timm:$imm), + (to_valid_timm timm:$idx))>; +def : Pat<(int_loongarch_lsx_vstelm_h v8i16:$vd, GPR:$rj, timm:$imm, timm:$idx), + (VSTELM_H v8i16:$vd, GPR:$rj, (to_valid_timm timm:$imm), + (to_valid_timm timm:$idx))>; +def : Pat<(int_loongarch_lsx_vstelm_w v4i32:$vd, GPR:$rj, timm:$imm, timm:$idx), + (VSTELM_W v4i32:$vd, GPR:$rj, (to_valid_timm timm:$imm), + (to_valid_timm timm:$idx))>; +def : Pat<(int_loongarch_lsx_vstelm_d v2i64:$vd, GPR:$rj, timm:$imm, timm:$idx), + (VSTELM_D v2i64:$vd, GPR:$rj, (to_valid_timm timm:$imm), + (to_valid_timm timm:$idx))>; + +} // Predicates = [HasExtLSX] diff --git a/llvm/lib/Target/LoongArch/LoongArchMCInstLower.cpp b/llvm/lib/Target/LoongArch/LoongArchMCInstLower.cpp index 64f08e260381279064a9f9ba1dab68487170919e..98ad49f25e3f2fddab1684ede35a93311cee023f 100644 --- a/llvm/lib/Target/LoongArch/LoongArchMCInstLower.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchMCInstLower.cpp @@ -47,30 +47,57 @@ static MCOperand lowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym, case LoongArchII::MO_PCREL_LO: Kind = LoongArchMCExpr::VK_LoongArch_PCALA_LO12; break; + case LoongArchII::MO_PCREL64_LO: + Kind = LoongArchMCExpr::VK_LoongArch_PCALA64_LO20; + break; + case LoongArchII::MO_PCREL64_HI: + Kind = LoongArchMCExpr::VK_LoongArch_PCALA64_HI12; + break; case LoongArchII::MO_GOT_PC_HI: Kind = LoongArchMCExpr::VK_LoongArch_GOT_PC_HI20; break; case LoongArchII::MO_GOT_PC_LO: Kind = LoongArchMCExpr::VK_LoongArch_GOT_PC_LO12; break; + case LoongArchII::MO_GOT_PC64_LO: + Kind = LoongArchMCExpr::VK_LoongArch_GOT64_PC_LO20; + break; + case LoongArchII::MO_GOT_PC64_HI: + Kind = LoongArchMCExpr::VK_LoongArch_GOT64_PC_HI12; + break; case LoongArchII::MO_LE_HI: Kind = LoongArchMCExpr::VK_LoongArch_TLS_LE_HI20; break; case LoongArchII::MO_LE_LO: Kind = LoongArchMCExpr::VK_LoongArch_TLS_LE_LO12; break; + case LoongArchII::MO_LE64_LO: + Kind = LoongArchMCExpr::VK_LoongArch_TLS_LE64_LO20; + break; + case LoongArchII::MO_LE64_HI: + Kind = LoongArchMCExpr::VK_LoongArch_TLS_LE64_HI12; + break; case LoongArchII::MO_IE_PC_HI: Kind = LoongArchMCExpr::VK_LoongArch_TLS_IE_PC_HI20; break; case LoongArchII::MO_IE_PC_LO: Kind = LoongArchMCExpr::VK_LoongArch_TLS_IE_PC_LO12; break; + case LoongArchII::MO_IE_PC64_LO: + Kind = LoongArchMCExpr::VK_LoongArch_TLS_IE64_PC_LO20; + break; + case LoongArchII::MO_IE_PC64_HI: + Kind = LoongArchMCExpr::VK_LoongArch_TLS_IE64_PC_HI12; + break; case LoongArchII::MO_LD_PC_HI: Kind = LoongArchMCExpr::VK_LoongArch_TLS_LD_PC_HI20; break; case LoongArchII::MO_GD_PC_HI: Kind = LoongArchMCExpr::VK_LoongArch_TLS_GD_PC_HI20; break; + case LoongArchII::MO_CALL36: + Kind = LoongArchMCExpr::VK_LoongArch_CALL36; + break; // TODO: Handle more target-flags. } diff --git a/llvm/lib/Target/LoongArch/LoongArchMachineFunctionInfo.h b/llvm/lib/Target/LoongArch/LoongArchMachineFunctionInfo.h index 47b021e2f78fa80fd61c42bc24f025cdbfcd71f1..fb6fb10f78a6514f22380c0cc0abc2dc45cfe3b0 100644 --- a/llvm/lib/Target/LoongArch/LoongArchMachineFunctionInfo.h +++ b/llvm/lib/Target/LoongArch/LoongArchMachineFunctionInfo.h @@ -36,6 +36,8 @@ private: /// insertIndirectBranch. int BranchRelaxationSpillFrameIndex = -1; + int FPOffsetAdjustment = 0; // OHOS_LOCAL + public: LoongArchMachineFunctionInfo(const MachineFunction &MF) {} @@ -61,6 +63,11 @@ public: void setBranchRelaxationSpillFrameIndex(int Index) { BranchRelaxationSpillFrameIndex = Index; } + + // OHOS_LOCAL begin + int getFPOffsetAdjustment() const { return FPOffsetAdjustment; } + void setFPOffsetAdjustment(int Adj) { FPOffsetAdjustment = Adj; } + // OHOS_LOCAL end }; } // end namespace llvm diff --git a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp index 2b667d9045e367f79c39dab4245a405b119b7eae..48b7f554d93b9232e8d4b8903899bd418a0ebed5 100644 --- a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp @@ -98,6 +98,19 @@ LoongArchRegisterInfo::getReservedRegs(const MachineFunction &MF) const { if (TFI->hasBP(MF)) markSuperRegs(Reserved, LoongArchABI::getBPReg()); // bp +// OHOS_LOCAL begin +#ifdef ARK_GC_SUPPORT + if (MF.getFunction().getCallingConv() == CallingConv::GHC) { + markSuperRegs(Reserved, LoongArch::R22); + markSuperRegs(Reserved, LoongArch::R1); + } + if ((MF.getFunction().getCallingConv() == CallingConv::WebKit_JS) || + (MF.getFunction().getCallingConv() == CallingConv::C)) { + markSuperRegs(Reserved, LoongArch::R1); + } +#endif + // OHOS_LOCAL end + // FIXME: To avoid generating COPY instructions between CFRs, only use $fcc0. // This is required to work around the fact that COPY instruction between CFRs // is not provided in LoongArch. diff --git a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.td b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.td index 4f0e0697667c0999ca3d40fcf9fdb3402327a8fb..e16eed5e2e07a3aacf0af34c06193394c5640e96 100644 --- a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.td @@ -17,6 +17,13 @@ class LoongArchReg Enc, string n, list alt = []> let AltNames = alt; } +class LoongArchRegWithSubRegs Enc, string n, list subregs, + list alt = []> + : RegisterWithSubRegs { + let HWEncoding = Enc; + let AltNames = alt; +} + class LoongArchReg32 Enc, string n, list alt = []> : Register { let HWEncoding = Enc; @@ -25,12 +32,21 @@ class LoongArchReg32 Enc, string n, list alt = []> def sub_32 : SubRegIndex<32>; class LoongArchReg64 - : Register<""> { - let HWEncoding = subreg.HWEncoding; - let SubRegs = [subreg]; + : LoongArchRegWithSubRegs { let SubRegIndices = [sub_32]; - let AsmName = subreg.AsmName; - let AltNames = subreg.AltNames; +} + +def sub_64 : SubRegIndex<64>; +class LoongArchReg128 + : LoongArchRegWithSubRegs { + let SubRegIndices = [sub_64]; +} + +def sub_128 : SubRegIndex<128>; +class LoongArchReg256 + : LoongArchRegWithSubRegs { + let SubRegIndices = [sub_128]; } let FallbackRegAltNameIndex = NoRegAltName in @@ -169,3 +185,23 @@ def FCSR#I : LoongArchReg; let isAllocatable = false in def FCSR : RegisterClass<"LoongArch", [i32], 32, (sequence "FCSR%u", 0, 3)>; + +// LSX registers + +foreach I = 0-31 in +def VR#I : LoongArchReg128("F"#I#"_64"), "vr"#I>, + DwarfRegAlias("F"#I#"_64")>; + +def LSX128 : RegisterClass<"LoongArch", + [v4f32, v2f64, v16i8, v8i16, v4i32, v2i64], + 128, (sequence "VR%u", 0, 31)>; + +// LASX registers + +foreach I = 0-31 in +def XR#I : LoongArchReg256("VR"#I), "xr"#I>, + DwarfRegAlias("VR"#I)>; + +def LASX256 : RegisterClass<"LoongArch", + [v8f32, v4f64, v32i8, v16i16, v8i32, v4i64], + 256, (sequence "XR%u", 0, 31)>; diff --git a/llvm/lib/Target/LoongArch/LoongArchSubtarget.h b/llvm/lib/Target/LoongArch/LoongArchSubtarget.h index 4ff42e3b13ea3c28d460bd0b592c3480e264bcee..ca430691345fe15308c611bc0ea10401696bc3b8 100644 --- a/llvm/lib/Target/LoongArch/LoongArchSubtarget.h +++ b/llvm/lib/Target/LoongArch/LoongArchSubtarget.h @@ -43,6 +43,7 @@ class LoongArchSubtarget : public LoongArchGenSubtargetInfo { bool HasLaGlobalWithAbs = false; bool HasLaLocalWithAbs = false; bool HasUAL = false; + bool HasExpAutoVec = false; unsigned GRLen = 32; MVT GRLenVT = MVT::i32; LoongArchABI::ABI TargetABI = LoongArchABI::ABI_Unknown; @@ -93,6 +94,7 @@ public: bool hasLaGlobalWithAbs() const { return HasLaGlobalWithAbs; } bool hasLaLocalWithAbs() const { return HasLaLocalWithAbs; } bool hasUAL() const { return HasUAL; } + bool hasExpAutoVec() const { return HasExpAutoVec; } MVT getGRLenVT() const { return GRLenVT; } unsigned getGRLen() const { return GRLen; } LoongArchABI::ABI getTargetABI() const { return TargetABI; } diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp index 83963bac7c67ac1da99ee7e610ba5a63613313bc..4600c2a2d67336e4f091f881d2d6e3abbd6315dc 100644 --- a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp @@ -12,12 +12,16 @@ #include "LoongArchTargetMachine.h" #include "LoongArch.h" +#include "LoongArchMachineFunctionInfo.h" +#include "LoongArchTargetTransformInfo.h" #include "MCTargetDesc/LoongArchBaseInfo.h" #include "TargetInfo/LoongArchTargetInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/MC/TargetRegistry.h" +#include "llvm/Support/CodeGen.h" #include using namespace llvm; @@ -44,13 +48,33 @@ static Reloc::Model getEffectiveRelocModel(const Triple &TT, return RM.value_or(Reloc::Static); } +static CodeModel::Model +getEffectiveLoongArchCodeModel(const Triple &TT, + Optional CM) { + if (!CM) + return CodeModel::Small; + + switch (*CM) { + case CodeModel::Small: + return *CM; + case CodeModel::Medium: + case CodeModel::Large: + if (!TT.isArch64Bit()) + report_fatal_error("Medium/Large code model requires LA64"); + return *CM; + default: + report_fatal_error( + "Only small, medium and large code models are allowed on LoongArch"); + } +} + LoongArchTargetMachine::LoongArchTargetMachine( const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, Optional RM, Optional CM, CodeGenOpt::Level OL, bool JIT) : LLVMTargetMachine(T, computeDataLayout(TT), TT, CPU, FS, Options, getEffectiveRelocModel(TT, RM), - getEffectiveCodeModel(CM, CodeModel::Small), OL), + getEffectiveLoongArchCodeModel(TT, CM), OL), TLOF(std::make_unique()) { initAsmInfo(); } @@ -128,9 +152,15 @@ bool LoongArchPassConfig::addInstSelector() { return false; } +TargetTransformInfo +LoongArchTargetMachine::getTargetTransformInfo(const Function &F) const { + return TargetTransformInfo(LoongArchTTIImpl(this, F)); +} + void LoongArchPassConfig::addPreEmitPass() { addPass(&BranchRelaxationPassID); } void LoongArchPassConfig::addPreEmitPass2() { + addPass(createLoongArchExpandPseudoPass()); // Schedule the expansion of AtomicPseudos at the last possible moment, // avoiding the possibility for other passes to break the requirements for // forward progress in the LL/SC block. diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.h b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.h index 4cdec7ec9164d03c7db9e48d670fe62aef7cefb5..e426adb5d79befd38d000a422e2192249bff1b3c 100644 --- a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.h +++ b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.h @@ -31,12 +31,18 @@ public: CodeGenOpt::Level OL, bool JIT); ~LoongArchTargetMachine() override; + TargetTransformInfo getTargetTransformInfo(const Function &F) const override; const LoongArchSubtarget *getSubtargetImpl(const Function &F) const override; const LoongArchSubtarget *getSubtargetImpl() const = delete; // Pass Pipeline Configuration TargetPassConfig *createPassConfig(PassManagerBase &PM) override; + // Addrspacecasts are always noops. + bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override { + return true; + } + TargetLoweringObjectFile *getObjFileLowering() const override { return TLOF.get(); } diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp new file mode 100644 index 0000000000000000000000000000000000000000..5f53251484a656732c9da468be86c031d89cf187 --- /dev/null +++ b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp @@ -0,0 +1,43 @@ +//===-- LoongArchTargetTransformInfo.cpp - LoongArch specific TTI ---------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// This file implements a TargetTransformInfo analysis pass specific to the +/// LoongArch target machine. It uses the target's detailed information to +/// provide more precise answers to certain TTI queries, while letting the +/// target independent and default TTI implementations handle the rest. +/// +//===----------------------------------------------------------------------===// + +#include "LoongArchTargetTransformInfo.h" + +using namespace llvm; + +#define DEBUG_TYPE "loongarchtti" + +TypeSize LoongArchTTIImpl::getRegisterBitWidth( + TargetTransformInfo::RegisterKind K) const { + TypeSize DefSize = TargetTransformInfoImplBase::getRegisterBitWidth(K); + switch (K) { + case TargetTransformInfo::RGK_Scalar: + return TypeSize::getFixed(ST->is64Bit() ? 64 : 32); + case TargetTransformInfo::RGK_FixedWidthVector: + if (!ST->hasExpAutoVec()) + return DefSize; + if (ST->hasExtLASX()) + return TypeSize::getFixed(256); + if (ST->hasExtLSX()) + return TypeSize::getFixed(128); + LLVM_FALLTHROUGH; + case TargetTransformInfo::RGK_ScalableVector: + return DefSize; + } + + llvm_unreachable("Unsupported register kind"); +} + +// TODO: Implement more hooks to provide TTI machinery for LoongArch. diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h new file mode 100644 index 0000000000000000000000000000000000000000..d296c9ed576fbd397db6d9b066bf0540dee2491f --- /dev/null +++ b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h @@ -0,0 +1,49 @@ +//===- LoongArchTargetTransformInfo.h - LoongArch specific TTI --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// This file a TargetTransformInfo::Concept conforming object specific to the +/// LoongArch target machine. It uses the target's detailed information to +/// provide more precise answers to certain TTI queries, while letting the +/// target independent and default TTI implementations handle the rest. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCHTARGETTRANSFORMINFO_H +#define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHTARGETTRANSFORMINFO_H + +#include "LoongArchSubtarget.h" +#include "LoongArchTargetMachine.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/CodeGen/BasicTTIImpl.h" + +namespace llvm { + +class LoongArchTTIImpl : public BasicTTIImplBase { + typedef BasicTTIImplBase BaseT; + typedef TargetTransformInfo TTI; + friend BaseT; + + const LoongArchSubtarget *ST; + const LoongArchTargetLowering *TLI; + + const LoongArchSubtarget *getST() const { return ST; } + const LoongArchTargetLowering *getTLI() const { return TLI; } + +public: + explicit LoongArchTTIImpl(const LoongArchTargetMachine *TM, const Function &F) + : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), + TLI(ST->getTargetLowering()) {} + + TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const; + + // TODO: Implement more hooks to provide TTI machinery for LoongArch. +}; + +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_LOONGARCH_LOONGARCHTARGETTRANSFORMINFO_H diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h index cdbd1f5699dfd70dfa6bb6f4bddad130b3b5fd40..0ab6704adc554d0369536f74d644f76a992fca86 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h @@ -31,14 +31,23 @@ enum { MO_CALL_PLT, MO_PCREL_HI, MO_PCREL_LO, + MO_PCREL64_LO, + MO_PCREL64_HI, MO_GOT_PC_HI, MO_GOT_PC_LO, + MO_GOT_PC64_LO, + MO_GOT_PC64_HI, MO_LE_HI, MO_LE_LO, + MO_LE64_LO, + MO_LE64_HI, MO_IE_PC_HI, MO_IE_PC_LO, + MO_IE_PC64_LO, + MO_IE_PC64_HI, MO_LD_PC_HI, MO_GD_PC_HI, + MO_CALL36 // TODO: Add more flags. }; } // end namespace LoongArchII diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp index a6b9c0652639fbcc7c95e7ab70ff8990957e2f9d..e71c2d3e8321ce246f520f69bd7a8b02406a23ee 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp @@ -82,6 +82,8 @@ unsigned LoongArchELFObjectWriter::getRelocType(MCContext &Ctx, return ELF::R_LARCH_TLS_LE64_LO20; case LoongArch::fixup_loongarch_tls_le64_hi12: return ELF::R_LARCH_TLS_LE64_HI12; + case LoongArch::fixup_loongarch_call36: + return ELF::R_LARCH_CALL36; // TODO: Handle more fixup-kinds. } } diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchFixupKinds.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchFixupKinds.h index ba2d6718cdf9a27ec3bf587a495fbf7009c792c8..12ff4406f504342debc16b395ab920921d3cf748 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchFixupKinds.h +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchFixupKinds.h @@ -106,7 +106,10 @@ enum Fixups { // 20-bit fixup corresponding to %gd_pc_hi20(foo) for instruction pcalau12i. fixup_loongarch_tls_gd_pc_hi20, // 20-bit fixup corresponding to %gd_hi20(foo) for instruction lu12i.w. - fixup_loongarch_tls_gd_hi20 + fixup_loongarch_tls_gd_hi20, + // 36-bit fixup corresponding to %call36(foo) for a pair instructions: + // pcaddu18i+jirl. + fixup_loongarch_call36 = FirstLiteralRelocationKind + ELF::R_LARCH_CALL36, }; } // end namespace LoongArch } // end namespace llvm diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp index 4587d59087f3220f6a42f4b8deb5959f19c0206b..1defd88643a63ec8b42d934d8a86978bbd61e1e6 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp @@ -43,6 +43,11 @@ public: SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const override; + template + void expandToVectorLDI(const MCInst &MI, raw_ostream &OS, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + /// TableGen'erated function for getting the binary encoding for an /// instruction. uint64_t getBinaryCodeForInstr(const MCInst &MI, @@ -65,12 +70,21 @@ public: /// Return binary encoding of an immediate operand specified by OpNo. /// The value returned is the value of the immediate shifted right - // arithmetically by 2. + // arithmetically by N. /// Note that this function is dedicated to specific immediate types, /// e.g. simm14_lsl2, simm16_lsl2, simm21_lsl2 and simm26_lsl2. - unsigned getImmOpValueAsr2(const MCInst &MI, unsigned OpNo, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const; + template + unsigned getImmOpValueAsr(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + const MCOperand &MO = MI.getOperand(OpNo); + if (MO.isImm()) { + unsigned Res = MI.getOperand(OpNo).getImm(); + assert((Res & ((1U << N) - 1U)) == 0 && "lowest N bits are non-zero"); + return Res >> N; + } + return getExprOpValue(MI, MO, Fixups, STI); + } unsigned getExprOpValue(const MCInst &MI, const MCOperand &MO, SmallVectorImpl &Fixups, @@ -101,21 +115,6 @@ LoongArchMCCodeEmitter::getImmOpValueSub1(const MCInst &MI, unsigned OpNo, return MI.getOperand(OpNo).getImm() - 1; } -unsigned -LoongArchMCCodeEmitter::getImmOpValueAsr2(const MCInst &MI, unsigned OpNo, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { - const MCOperand &MO = MI.getOperand(OpNo); - - if (MO.isImm()) { - unsigned Res = MI.getOperand(OpNo).getImm(); - assert((Res & 3) == 0 && "lowest 2 bits are non-zero"); - return Res >> 2; - } - - return getExprOpValue(MI, MO, Fixups, STI); -} - unsigned LoongArchMCCodeEmitter::getExprOpValue(const MCInst &MI, const MCOperand &MO, SmallVectorImpl &Fixups, @@ -238,6 +237,9 @@ LoongArchMCCodeEmitter::getExprOpValue(const MCInst &MI, const MCOperand &MO, case LoongArchMCExpr::VK_LoongArch_TLS_GD_HI20: FixupKind = LoongArch::fixup_loongarch_tls_gd_hi20; break; + case LoongArchMCExpr::VK_LoongArch_CALL36: + FixupKind = LoongArch::fixup_loongarch_call36; + break; } } else if (Kind == MCExpr::SymbolRef && cast(Expr)->getKind() == @@ -260,6 +262,7 @@ LoongArchMCCodeEmitter::getExprOpValue(const MCInst &MI, const MCOperand &MO, FixupKind = LoongArch::fixup_loongarch_b21; break; case LoongArch::B: + case LoongArch::BL: FixupKind = LoongArch::fixup_loongarch_b26; break; } @@ -273,6 +276,33 @@ LoongArchMCCodeEmitter::getExprOpValue(const MCInst &MI, const MCOperand &MO, return 0; } +template +void LoongArchMCCodeEmitter::expandToVectorLDI( + const MCInst &MI, raw_ostream &OS, + SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const { + int64_t Imm = MI.getOperand(1).getImm() & 0x3FF; + switch (MI.getOpcode()) { + case LoongArch::PseudoVREPLI_B: + case LoongArch::PseudoXVREPLI_B: + break; + case LoongArch::PseudoVREPLI_H: + case LoongArch::PseudoXVREPLI_H: + Imm |= 0x400; + break; + case LoongArch::PseudoVREPLI_W: + case LoongArch::PseudoXVREPLI_W: + Imm |= 0x800; + break; + case LoongArch::PseudoVREPLI_D: + case LoongArch::PseudoXVREPLI_D: + Imm |= 0xC00; + break; + } + MCInst TmpInst = MCInstBuilder(Opc).addOperand(MI.getOperand(0)).addImm(Imm); + uint32_t Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI); + support::endian::write(OS, Binary, support::little); +} + void LoongArchMCCodeEmitter::encodeInstruction( const MCInst &MI, raw_ostream &OS, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const { @@ -280,6 +310,21 @@ void LoongArchMCCodeEmitter::encodeInstruction( // Get byte count of instruction. unsigned Size = Desc.getSize(); + switch (MI.getOpcode()) { + default: + break; + case LoongArch::PseudoVREPLI_B: + case LoongArch::PseudoVREPLI_H: + case LoongArch::PseudoVREPLI_W: + case LoongArch::PseudoVREPLI_D: + return expandToVectorLDI(MI, OS, Fixups, STI); + case LoongArch::PseudoXVREPLI_B: + case LoongArch::PseudoXVREPLI_H: + case LoongArch::PseudoXVREPLI_W: + case LoongArch::PseudoXVREPLI_D: + return expandToVectorLDI(MI, OS, Fixups, STI); + } + switch (Size) { default: llvm_unreachable("Unhandled encodeInstruction length!"); diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.cpp index 993111552a31430cd3111efae969680e6bb7e4aa..3b505f36273726f0f0ac6582807f615175476cf3 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.cpp +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.cpp @@ -137,6 +137,8 @@ StringRef LoongArchMCExpr::getVariantKindName(VariantKind Kind) { return "gd_pc_hi20"; case VK_LoongArch_TLS_GD_HI20: return "gd_hi20"; + case VK_LoongArch_CALL36: + return "call36"; } } @@ -179,6 +181,7 @@ LoongArchMCExpr::getVariantKindForName(StringRef name) { .Case("ld_hi20", VK_LoongArch_TLS_LD_HI20) .Case("gd_pc_hi20", VK_LoongArch_TLS_GD_PC_HI20) .Case("gd_hi20", VK_LoongArch_TLS_GD_HI20) + .Case("call36", VK_LoongArch_CALL36) .Default(VK_LoongArch_Invalid); } diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.h index 0945cf82db865cdaa3c2edc412d0eb3c96e8ba39..10449dbd4da473b97a27e039259207959bb8a405 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.h +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCExpr.h @@ -61,6 +61,7 @@ public: VK_LoongArch_TLS_LD_HI20, VK_LoongArch_TLS_GD_PC_HI20, VK_LoongArch_TLS_GD_HI20, + VK_LoongArch_CALL36, VK_LoongArch_Invalid // Must be the last item. }; diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp index 942e667bc2618728e2ddcdcc22ae47d0fed315d9..a4e6a09863e6a68746e72453237365664e8b354c 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp @@ -97,13 +97,90 @@ public: bool evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size, uint64_t &Target) const override { unsigned NumOps = Inst.getNumOperands(); - if (isBranch(Inst) || Inst.getOpcode() == LoongArch::BL) { + if ((isBranch(Inst) && !isIndirectBranch(Inst)) || + Inst.getOpcode() == LoongArch::BL) { Target = Addr + Inst.getOperand(NumOps - 1).getImm(); return true; } return false; } + + bool isTerminator(const MCInst &Inst) const override { + if (MCInstrAnalysis::isTerminator(Inst)) + return true; + + switch (Inst.getOpcode()) { + default: + return false; + case LoongArch::JIRL: + return Inst.getOperand(0).getReg() == LoongArch::R0; + } + } + + bool isCall(const MCInst &Inst) const override { + if (MCInstrAnalysis::isCall(Inst)) + return true; + + switch (Inst.getOpcode()) { + default: + return false; + case LoongArch::JIRL: + return Inst.getOperand(0).getReg() != LoongArch::R0; + } + } + + bool isReturn(const MCInst &Inst) const override { + if (MCInstrAnalysis::isReturn(Inst)) + return true; + + switch (Inst.getOpcode()) { + default: + return false; + case LoongArch::JIRL: + return Inst.getOperand(0).getReg() == LoongArch::R0 && + Inst.getOperand(1).getReg() == LoongArch::R1; + } + } + + bool isBranch(const MCInst &Inst) const override { + if (MCInstrAnalysis::isBranch(Inst)) + return true; + + switch (Inst.getOpcode()) { + default: + return false; + case LoongArch::JIRL: + return Inst.getOperand(0).getReg() == LoongArch::R0 && + Inst.getOperand(1).getReg() != LoongArch::R1; + } + } + + bool isUnconditionalBranch(const MCInst &Inst) const override { + if (MCInstrAnalysis::isUnconditionalBranch(Inst)) + return true; + + switch (Inst.getOpcode()) { + default: + return false; + case LoongArch::JIRL: + return Inst.getOperand(0).getReg() == LoongArch::R0 && + Inst.getOperand(1).getReg() != LoongArch::R1; + } + } + + bool isIndirectBranch(const MCInst &Inst) const override { + if (MCInstrAnalysis::isIndirectBranch(Inst)) + return true; + + switch (Inst.getOpcode()) { + default: + return false; + case LoongArch::JIRL: + return Inst.getOperand(0).getReg() == LoongArch::R0 && + Inst.getOperand(1).getReg() != LoongArch::R1; + } + } }; } // end namespace diff --git a/llvm/test/Assembler/globalvariable-attributes.ll b/llvm/test/Assembler/globalvariable-attributes.ll index 544f9bdb270e99ab533f045c661ed1f3e729d916..4882b447973c08db49bf98ed77872d29cb91e5b8 100644 --- a/llvm/test/Assembler/globalvariable-attributes.ll +++ b/llvm/test/Assembler/globalvariable-attributes.ll @@ -9,6 +9,11 @@ @g7 = global i32 2, sanitize_address_dyninit, align 4 @g8 = global i32 2, sanitize_memtag, align 4 @g9 = global i32 2, no_sanitize_address, no_sanitize_hwaddress, sanitize_memtag, align 4 +@g10 = global i32 2, code_model "tiny" +@g11 = global i32 2, code_model "small" +@g12 = global i32 2, code_model "kernel" +@g13 = global i32 2, code_model "medium" +@g14 = global i32 2, code_model "large" attributes #0 = { "string" = "value" nobuiltin norecurse } @@ -21,6 +26,11 @@ attributes #0 = { "string" = "value" nobuiltin norecurse } ; CHECK: @g7 = global i32 2, sanitize_address_dyninit, align 4 ; CHECK: @g8 = global i32 2, sanitize_memtag, align 4 ; CHECK: @g9 = global i32 2, no_sanitize_address, no_sanitize_hwaddress, sanitize_memtag, align 4 +; CHECK: @g10 = global i32 2, code_model "tiny" +; CHECK: @g11 = global i32 2, code_model "small" +; CHECK: @g12 = global i32 2, code_model "kernel" +; CHECK: @g13 = global i32 2, code_model "medium" +; CHECK: @g14 = global i32 2, code_model "large" ; CHECK: attributes #0 = { "key"="value" "key2"="value2" } ; CHECK: attributes #1 = { "key3"="value3" } diff --git a/llvm/test/CodeGen/LoongArch/O0-pipeline.ll b/llvm/test/CodeGen/LoongArch/O0-pipeline.ll index b8d53ea57ff4fa6df428177dfd85af105fac4a07..1482518918f21aa8144d0334dd1c35c238e72ebc 100644 --- a/llvm/test/CodeGen/LoongArch/O0-pipeline.ll +++ b/llvm/test/CodeGen/LoongArch/O0-pipeline.ll @@ -61,6 +61,7 @@ ; CHECK-NEXT: Contiguously Lay Out Funclets ; CHECK-NEXT: StackMap Liveness Analysis ; CHECK-NEXT: Live DEBUG_VALUE analysis +; CHECK-NEXT: LoongArch pseudo instruction expansion pass ; CHECK-NEXT: LoongArch atomic pseudo instruction expansion pass ; CHECK-NEXT: Lazy Machine Block Frequency Analysis ; CHECK-NEXT: Machine Optimization Remark Emitter diff --git a/llvm/test/CodeGen/LoongArch/addrspacecast.ll b/llvm/test/CodeGen/LoongArch/addrspacecast.ll new file mode 100644 index 0000000000000000000000000000000000000000..7875562331be09eea4327c3e360d3636b20058d3 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/addrspacecast.ll @@ -0,0 +1,47 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --verify-machineinstrs < %s | FileCheck %s --check-prefix=LA32 +; RUN: llc --mtriple=loongarch64 --verify-machineinstrs < %s | FileCheck %s --check-prefix=LA64 + +define void @cast0(ptr addrspace(1) %ptr) { +; LA32-LABEL: cast0: +; LA32: # %bb.0: +; LA32-NEXT: st.w $zero, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: cast0: +; LA64: # %bb.0: +; LA64-NEXT: st.w $zero, $a0, 0 +; LA64-NEXT: ret + %ptr0 = addrspacecast ptr addrspace(1) %ptr to ptr addrspace(0) + store i32 0, ptr %ptr0 + ret void +} + +define void @cast1(ptr %ptr) { +; LA32-LABEL: cast1: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: .cfi_def_cfa_offset 16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: .cfi_offset 1, -4 +; LA32-NEXT: bl %plt(foo) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: cast1: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: .cfi_def_cfa_offset 16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: .cfi_offset 1, -8 +; LA64-NEXT: bl %plt(foo) +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret + %castptr = addrspacecast ptr %ptr to ptr addrspace(10) + call void @foo(ptr addrspace(10) %castptr) + ret void +} + +declare void @foo(ptr addrspace(10)) diff --git a/llvm/test/CodeGen/LoongArch/bstrins_w.ll b/llvm/test/CodeGen/LoongArch/bstrins_w.ll index dfbe000841cdcbdf5481ddce19a2d6a6987dfcb6..e008caacad2a17adeea14932cac0249c7cc58ba0 100644 --- a/llvm/test/CodeGen/LoongArch/bstrins_w.ll +++ b/llvm/test/CodeGen/LoongArch/bstrins_w.ll @@ -145,6 +145,19 @@ define i32 @pat5(i32 %a) nounwind { ret i32 %or } +;; The high bits of `const` are zero. +define i32 @pat5_high_zeros(i32 %a) nounwind { +; CHECK-LABEL: pat5_high_zeros: +; CHECK: # %bb.0: +; CHECK-NEXT: lu12i.w $a1, 1 +; CHECK-NEXT: ori $a1, $a1, 564 +; CHECK-NEXT: bstrins.w $a0, $a1, 31, 16 +; CHECK-NEXT: ret + %and = and i32 %a, 65535 ; 0x0000ffff + %or = or i32 %and, 305397760 ; 0x12340000 + ret i32 %or +} + ;; Pattern 6: a = b | ((c & mask) << shamt) ;; In this testcase b is 0x10000002, but in fact we do not require b being a ;; constant. As long as all positions in b to be overwritten by the incoming diff --git a/llvm/test/CodeGen/LoongArch/codemodel-medium.ll b/llvm/test/CodeGen/LoongArch/code-models.ll similarity index 54% rename from llvm/test/CodeGen/LoongArch/codemodel-medium.ll rename to llvm/test/CodeGen/LoongArch/code-models.ll index d4d97e7df804ddd059012f613278257651efb4bd..f93c316709284bd1c2db408b46a3c87c5a946f5c 100644 --- a/llvm/test/CodeGen/LoongArch/codemodel-medium.ll +++ b/llvm/test/CodeGen/LoongArch/code-models.ll @@ -3,6 +3,8 @@ ; RUN: FileCheck --check-prefix=SMALL %s ; RUN: llc --mtriple=loongarch64 --code-model=medium < %s | \ ; RUN: FileCheck --check-prefix=MEDIUM %s +; RUN: llc --mtriple=loongarch64 --code-model=large < %s | \ +; RUN: FileCheck --check-prefix=LARGE %s declare void @llvm.memset.p0.i64(ptr, i8, i64, i1) declare i32 @callee(i32) @@ -21,11 +23,25 @@ define i32 @call_globaladdress(i32 %a) nounwind { ; MEDIUM: # %bb.0: ; MEDIUM-NEXT: addi.d $sp, $sp, -16 ; MEDIUM-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill -; MEDIUM-NEXT: pcalau12i $ra, %pc_hi20(callee) -; MEDIUM-NEXT: jirl $ra, $ra, %pc_lo12(callee) +; MEDIUM-NEXT: pcaddu18i $ra, %call36(callee) +; MEDIUM-NEXT: jirl $ra, $ra, 0 ; MEDIUM-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload ; MEDIUM-NEXT: addi.d $sp, $sp, 16 ; MEDIUM-NEXT: ret +; +; LARGE-LABEL: call_globaladdress: +; LARGE: # %bb.0: +; LARGE-NEXT: addi.d $sp, $sp, -16 +; LARGE-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LARGE-NEXT: pcalau12i $ra, %got_pc_hi20(callee) +; LARGE-NEXT: addi.d $t8, $zero, %got_pc_lo12(callee) +; LARGE-NEXT: lu32i.d $t8, %got64_pc_lo20(callee) +; LARGE-NEXT: lu52i.d $t8, $t8, %got64_pc_hi12(callee) +; LARGE-NEXT: ldx.d $ra, $t8, $ra +; LARGE-NEXT: jirl $ra, $ra, 0 +; LARGE-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LARGE-NEXT: addi.d $sp, $sp, 16 +; LARGE-NEXT: ret %1 = call i32 @callee(i32 %a) ret i32 %1 } @@ -52,11 +68,29 @@ define void @call_external_sym(ptr %dst) { ; MEDIUM-NEXT: .cfi_offset 1, -8 ; MEDIUM-NEXT: ori $a2, $zero, 1000 ; MEDIUM-NEXT: move $a1, $zero -; MEDIUM-NEXT: pcalau12i $ra, %pc_hi20(memset) -; MEDIUM-NEXT: jirl $ra, $ra, %pc_lo12(memset) +; MEDIUM-NEXT: pcaddu18i $ra, %call36(memset) +; MEDIUM-NEXT: jirl $ra, $ra, 0 ; MEDIUM-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload ; MEDIUM-NEXT: addi.d $sp, $sp, 16 ; MEDIUM-NEXT: ret +; +; LARGE-LABEL: call_external_sym: +; LARGE: # %bb.0: # %entry +; LARGE-NEXT: addi.d $sp, $sp, -16 +; LARGE-NEXT: .cfi_def_cfa_offset 16 +; LARGE-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LARGE-NEXT: .cfi_offset 1, -8 +; LARGE-NEXT: ori $a2, $zero, 1000 +; LARGE-NEXT: move $a1, $zero +; LARGE-NEXT: pcalau12i $ra, %pc_hi20(memset) +; LARGE-NEXT: addi.d $t8, $zero, %pc_lo12(memset) +; LARGE-NEXT: lu32i.d $t8, %pc64_lo20(memset) +; LARGE-NEXT: lu52i.d $t8, $t8, %pc64_hi12(memset) +; LARGE-NEXT: add.d $ra, $t8, $ra +; LARGE-NEXT: jirl $ra, $ra, 0 +; LARGE-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LARGE-NEXT: addi.d $sp, $sp, 16 +; LARGE-NEXT: ret entry: call void @llvm.memset.p0.i64(ptr %dst, i8 0, i64 1000, i1 false) ret void @@ -71,8 +105,17 @@ define i32 @caller_tail(i32 %i) nounwind { ; ; MEDIUM-LABEL: caller_tail: ; MEDIUM: # %bb.0: # %entry -; MEDIUM-NEXT: pcalau12i $a1, %pc_hi20(callee_tail) -; MEDIUM-NEXT: jirl $zero, $a1, %pc_lo12(callee_tail) +; MEDIUM-NEXT: pcaddu18i $t8, %call36(callee_tail) +; MEDIUM-NEXT: jr $t8 +; +; LARGE-LABEL: caller_tail: +; LARGE: # %bb.0: # %entry +; LARGE-NEXT: pcalau12i $t7, %got_pc_hi20(callee_tail) +; LARGE-NEXT: addi.d $t8, $zero, %got_pc_lo12(callee_tail) +; LARGE-NEXT: lu32i.d $t8, %got64_pc_lo20(callee_tail) +; LARGE-NEXT: lu52i.d $t8, $t8, %got64_pc_hi12(callee_tail) +; LARGE-NEXT: ldx.d $t7, $t8, $t7 +; LARGE-NEXT: jr $t7 entry: %r = tail call i32 @callee_tail(i32 %i) ret i32 %r diff --git a/llvm/test/CodeGen/LoongArch/double-fma.ll b/llvm/test/CodeGen/LoongArch/double-fma.ll index 6dd62847943375325952d10159ca8468f864ff04..58d20c62a668aec4c0bbfac075aa5166416f38c1 100644 --- a/llvm/test/CodeGen/LoongArch/double-fma.ll +++ b/llvm/test/CodeGen/LoongArch/double-fma.ll @@ -236,13 +236,15 @@ define double @fnmsub_d(double %a, double %b, double %c) nounwind { ; LA32-CONTRACT-ON-LABEL: fnmsub_d: ; LA32-CONTRACT-ON: # %bb.0: ; LA32-CONTRACT-ON-NEXT: fmul.d $fa0, $fa0, $fa1 -; LA32-CONTRACT-ON-NEXT: fsub.d $fa0, $fa2, $fa0 +; LA32-CONTRACT-ON-NEXT: fsub.d $fa0, $fa0, $fa2 +; LA32-CONTRACT-ON-NEXT: fneg.d $fa0, $fa0 ; LA32-CONTRACT-ON-NEXT: ret ; ; LA32-CONTRACT-OFF-LABEL: fnmsub_d: ; LA32-CONTRACT-OFF: # %bb.0: ; LA32-CONTRACT-OFF-NEXT: fmul.d $fa0, $fa0, $fa1 -; LA32-CONTRACT-OFF-NEXT: fsub.d $fa0, $fa2, $fa0 +; LA32-CONTRACT-OFF-NEXT: fsub.d $fa0, $fa0, $fa2 +; LA32-CONTRACT-OFF-NEXT: fneg.d $fa0, $fa0 ; LA32-CONTRACT-OFF-NEXT: ret ; ; LA64-CONTRACT-FAST-LABEL: fnmsub_d: @@ -253,12 +255,98 @@ define double @fnmsub_d(double %a, double %b, double %c) nounwind { ; LA64-CONTRACT-ON-LABEL: fnmsub_d: ; LA64-CONTRACT-ON: # %bb.0: ; LA64-CONTRACT-ON-NEXT: fmul.d $fa0, $fa0, $fa1 -; LA64-CONTRACT-ON-NEXT: fsub.d $fa0, $fa2, $fa0 +; LA64-CONTRACT-ON-NEXT: fsub.d $fa0, $fa0, $fa2 +; LA64-CONTRACT-ON-NEXT: fneg.d $fa0, $fa0 ; LA64-CONTRACT-ON-NEXT: ret ; ; LA64-CONTRACT-OFF-LABEL: fnmsub_d: ; LA64-CONTRACT-OFF: # %bb.0: ; LA64-CONTRACT-OFF-NEXT: fmul.d $fa0, $fa0, $fa1 +; LA64-CONTRACT-OFF-NEXT: fsub.d $fa0, $fa0, $fa2 +; LA64-CONTRACT-OFF-NEXT: fneg.d $fa0, $fa0 +; LA64-CONTRACT-OFF-NEXT: ret + %negc = fneg double %c + %mul = fmul double %a, %b + %add = fadd double %mul, %negc + %neg = fneg double %add + ret double %neg +} + +define double @fnmsub_d_nsz(double %a, double %b, double %c) nounwind { +; LA32-CONTRACT-FAST-LABEL: fnmsub_d_nsz: +; LA32-CONTRACT-FAST: # %bb.0: +; LA32-CONTRACT-FAST-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: fnmsub_d_nsz: +; LA32-CONTRACT-ON: # %bb.0: +; LA32-CONTRACT-ON-NEXT: fmul.d $fa0, $fa0, $fa1 +; LA32-CONTRACT-ON-NEXT: fsub.d $fa0, $fa2, $fa0 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: fnmsub_d_nsz: +; LA32-CONTRACT-OFF: # %bb.0: +; LA32-CONTRACT-OFF-NEXT: fmul.d $fa0, $fa0, $fa1 +; LA32-CONTRACT-OFF-NEXT: fsub.d $fa0, $fa2, $fa0 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: fnmsub_d_nsz: +; LA64-CONTRACT-FAST: # %bb.0: +; LA64-CONTRACT-FAST-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: fnmsub_d_nsz: +; LA64-CONTRACT-ON: # %bb.0: +; LA64-CONTRACT-ON-NEXT: fmul.d $fa0, $fa0, $fa1 +; LA64-CONTRACT-ON-NEXT: fsub.d $fa0, $fa2, $fa0 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: fnmsub_d_nsz: +; LA64-CONTRACT-OFF: # %bb.0: +; LA64-CONTRACT-OFF-NEXT: fmul.d $fa0, $fa0, $fa1 +; LA64-CONTRACT-OFF-NEXT: fsub.d $fa0, $fa2, $fa0 +; LA64-CONTRACT-OFF-NEXT: ret + %nega = fneg nsz double %a + %mul = fmul nsz double %nega, %b + %add = fadd nsz double %mul, %c + ret double %add +} + +;; Check that fnmsub.d is not emitted. +define double @not_fnmsub_d(double %a, double %b, double %c) nounwind { +; LA32-CONTRACT-FAST-LABEL: not_fnmsub_d: +; LA32-CONTRACT-FAST: # %bb.0: +; LA32-CONTRACT-FAST-NEXT: fneg.d $fa0, $fa0 +; LA32-CONTRACT-FAST-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: not_fnmsub_d: +; LA32-CONTRACT-ON: # %bb.0: +; LA32-CONTRACT-ON-NEXT: fmul.d $fa0, $fa0, $fa1 +; LA32-CONTRACT-ON-NEXT: fsub.d $fa0, $fa2, $fa0 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: not_fnmsub_d: +; LA32-CONTRACT-OFF: # %bb.0: +; LA32-CONTRACT-OFF-NEXT: fmul.d $fa0, $fa0, $fa1 +; LA32-CONTRACT-OFF-NEXT: fsub.d $fa0, $fa2, $fa0 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: not_fnmsub_d: +; LA64-CONTRACT-FAST: # %bb.0: +; LA64-CONTRACT-FAST-NEXT: fneg.d $fa0, $fa0 +; LA64-CONTRACT-FAST-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: not_fnmsub_d: +; LA64-CONTRACT-ON: # %bb.0: +; LA64-CONTRACT-ON-NEXT: fmul.d $fa0, $fa0, $fa1 +; LA64-CONTRACT-ON-NEXT: fsub.d $fa0, $fa2, $fa0 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: not_fnmsub_d: +; LA64-CONTRACT-OFF: # %bb.0: +; LA64-CONTRACT-OFF-NEXT: fmul.d $fa0, $fa0, $fa1 ; LA64-CONTRACT-OFF-NEXT: fsub.d $fa0, $fa2, $fa0 ; LA64-CONTRACT-OFF-NEXT: ret %nega = fneg double %a @@ -483,6 +571,86 @@ define double @contract_fnmsub_d(double %a, double %b, double %c) nounwind { ; LA64-CONTRACT-OFF-LABEL: contract_fnmsub_d: ; LA64-CONTRACT-OFF: # %bb.0: ; LA64-CONTRACT-OFF-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-OFF-NEXT: ret + %negc = fneg contract double %c + %mul = fmul contract double %a, %b + %add = fadd contract double %mul, %negc + %neg = fneg contract double %add + ret double %neg +} + +define double @contract_fnmsub_d_nsz(double %a, double %b, double %c) nounwind { +; LA32-CONTRACT-FAST-LABEL: contract_fnmsub_d_nsz: +; LA32-CONTRACT-FAST: # %bb.0: +; LA32-CONTRACT-FAST-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: contract_fnmsub_d_nsz: +; LA32-CONTRACT-ON: # %bb.0: +; LA32-CONTRACT-ON-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: contract_fnmsub_d_nsz: +; LA32-CONTRACT-OFF: # %bb.0: +; LA32-CONTRACT-OFF-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: contract_fnmsub_d_nsz: +; LA64-CONTRACT-FAST: # %bb.0: +; LA64-CONTRACT-FAST-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: contract_fnmsub_d_nsz: +; LA64-CONTRACT-ON: # %bb.0: +; LA64-CONTRACT-ON-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: contract_fnmsub_d_nsz: +; LA64-CONTRACT-OFF: # %bb.0: +; LA64-CONTRACT-OFF-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-OFF-NEXT: ret + %nega = fneg contract nsz double %a + %mul = fmul contract nsz double %nega, %b + %add = fadd contract nsz double %mul, %c + ret double %add +} + +;; Check that fnmsub.d is not emitted. +define double @not_contract_fnmsub_d(double %a, double %b, double %c) nounwind { +; LA32-CONTRACT-FAST-LABEL: not_contract_fnmsub_d: +; LA32-CONTRACT-FAST: # %bb.0: +; LA32-CONTRACT-FAST-NEXT: fneg.d $fa0, $fa0 +; LA32-CONTRACT-FAST-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: not_contract_fnmsub_d: +; LA32-CONTRACT-ON: # %bb.0: +; LA32-CONTRACT-ON-NEXT: fneg.d $fa0, $fa0 +; LA32-CONTRACT-ON-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: not_contract_fnmsub_d: +; LA32-CONTRACT-OFF: # %bb.0: +; LA32-CONTRACT-OFF-NEXT: fneg.d $fa0, $fa0 +; LA32-CONTRACT-OFF-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: not_contract_fnmsub_d: +; LA64-CONTRACT-FAST: # %bb.0: +; LA64-CONTRACT-FAST-NEXT: fneg.d $fa0, $fa0 +; LA64-CONTRACT-FAST-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: not_contract_fnmsub_d: +; LA64-CONTRACT-ON: # %bb.0: +; LA64-CONTRACT-ON-NEXT: fneg.d $fa0, $fa0 +; LA64-CONTRACT-ON-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: not_contract_fnmsub_d: +; LA64-CONTRACT-OFF: # %bb.0: +; LA64-CONTRACT-OFF-NEXT: fneg.d $fa0, $fa0 +; LA64-CONTRACT-OFF-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 ; LA64-CONTRACT-OFF-NEXT: ret %nega = fneg contract double %a %mul = fmul contract double %nega, %b @@ -592,8 +760,8 @@ define double @fnmadd_d_intrinsics(double %a, double %b, double %c) nounwind { ; LA64-CONTRACT-OFF-NEXT: fnmadd.d $fa0, $fa0, $fa1, $fa2 ; LA64-CONTRACT-OFF-NEXT: ret %fma = call double @llvm.fma.f64(double %a, double %b, double %c) - %neg = fneg double %fma - ret double %neg + %negfma = fneg double %fma + ret double %negfma } define double @fnmadd_d_nsz_intrinsics(double %a, double %b, double %c) nounwind { @@ -704,44 +872,87 @@ define double @fnmsub_d_intrinsics(double %a, double %b, double %c) nounwind { ; LA64-CONTRACT-OFF-LABEL: fnmsub_d_intrinsics: ; LA64-CONTRACT-OFF: # %bb.0: ; LA64-CONTRACT-OFF-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-OFF-NEXT: ret + %negc = fneg double %c + %fma = call double @llvm.fma.f64(double %a, double %b, double %negc) + %negfma = fneg double %fma + ret double %negfma +} + +define double @fnmsub_d_nsz_intrinsics(double %a, double %b, double %c) nounwind { +; LA32-CONTRACT-FAST-LABEL: fnmsub_d_nsz_intrinsics: +; LA32-CONTRACT-FAST: # %bb.0: +; LA32-CONTRACT-FAST-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: fnmsub_d_nsz_intrinsics: +; LA32-CONTRACT-ON: # %bb.0: +; LA32-CONTRACT-ON-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: fnmsub_d_nsz_intrinsics: +; LA32-CONTRACT-OFF: # %bb.0: +; LA32-CONTRACT-OFF-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: fnmsub_d_nsz_intrinsics: +; LA64-CONTRACT-FAST: # %bb.0: +; LA64-CONTRACT-FAST-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: fnmsub_d_nsz_intrinsics: +; LA64-CONTRACT-ON: # %bb.0: +; LA64-CONTRACT-ON-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: fnmsub_d_nsz_intrinsics: +; LA64-CONTRACT-OFF: # %bb.0: +; LA64-CONTRACT-OFF-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 ; LA64-CONTRACT-OFF-NEXT: ret %nega = fneg double %a - %fma = call double @llvm.fma.f64(double %nega, double %b, double %c) + %fma = call nsz double @llvm.fma.f64(double %nega, double %b, double %c) ret double %fma } -define double @fnmsub_d_swap_intrinsics(double %a, double %b, double %c) nounwind { -; LA32-CONTRACT-FAST-LABEL: fnmsub_d_swap_intrinsics: +;; Check that fnmsub.d is not emitted. +define double @not_fnmsub_d_intrinsics(double %a, double %b, double %c) nounwind { +; LA32-CONTRACT-FAST-LABEL: not_fnmsub_d_intrinsics: ; LA32-CONTRACT-FAST: # %bb.0: -; LA32-CONTRACT-FAST-NEXT: fnmsub.d $fa0, $fa1, $fa0, $fa2 +; LA32-CONTRACT-FAST-NEXT: fneg.d $fa0, $fa0 +; LA32-CONTRACT-FAST-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 ; LA32-CONTRACT-FAST-NEXT: ret ; -; LA32-CONTRACT-ON-LABEL: fnmsub_d_swap_intrinsics: +; LA32-CONTRACT-ON-LABEL: not_fnmsub_d_intrinsics: ; LA32-CONTRACT-ON: # %bb.0: -; LA32-CONTRACT-ON-NEXT: fnmsub.d $fa0, $fa1, $fa0, $fa2 +; LA32-CONTRACT-ON-NEXT: fneg.d $fa0, $fa0 +; LA32-CONTRACT-ON-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 ; LA32-CONTRACT-ON-NEXT: ret ; -; LA32-CONTRACT-OFF-LABEL: fnmsub_d_swap_intrinsics: +; LA32-CONTRACT-OFF-LABEL: not_fnmsub_d_intrinsics: ; LA32-CONTRACT-OFF: # %bb.0: -; LA32-CONTRACT-OFF-NEXT: fnmsub.d $fa0, $fa1, $fa0, $fa2 +; LA32-CONTRACT-OFF-NEXT: fneg.d $fa0, $fa0 +; LA32-CONTRACT-OFF-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 ; LA32-CONTRACT-OFF-NEXT: ret ; -; LA64-CONTRACT-FAST-LABEL: fnmsub_d_swap_intrinsics: +; LA64-CONTRACT-FAST-LABEL: not_fnmsub_d_intrinsics: ; LA64-CONTRACT-FAST: # %bb.0: -; LA64-CONTRACT-FAST-NEXT: fnmsub.d $fa0, $fa1, $fa0, $fa2 +; LA64-CONTRACT-FAST-NEXT: fneg.d $fa0, $fa0 +; LA64-CONTRACT-FAST-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 ; LA64-CONTRACT-FAST-NEXT: ret ; -; LA64-CONTRACT-ON-LABEL: fnmsub_d_swap_intrinsics: +; LA64-CONTRACT-ON-LABEL: not_fnmsub_d_intrinsics: ; LA64-CONTRACT-ON: # %bb.0: -; LA64-CONTRACT-ON-NEXT: fnmsub.d $fa0, $fa1, $fa0, $fa2 +; LA64-CONTRACT-ON-NEXT: fneg.d $fa0, $fa0 +; LA64-CONTRACT-ON-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 ; LA64-CONTRACT-ON-NEXT: ret ; -; LA64-CONTRACT-OFF-LABEL: fnmsub_d_swap_intrinsics: +; LA64-CONTRACT-OFF-LABEL: not_fnmsub_d_intrinsics: ; LA64-CONTRACT-OFF: # %bb.0: -; LA64-CONTRACT-OFF-NEXT: fnmsub.d $fa0, $fa1, $fa0, $fa2 +; LA64-CONTRACT-OFF-NEXT: fneg.d $fa0, $fa0 +; LA64-CONTRACT-OFF-NEXT: fmadd.d $fa0, $fa0, $fa1, $fa2 ; LA64-CONTRACT-OFF-NEXT: ret - %negb = fneg double %b - %fma = call double @llvm.fma.f64(double %a, double %negb, double %c) + %nega = fneg double %a + %fma = call double @llvm.fma.f64(double %nega, double %b, double %c) ret double %fma } @@ -882,6 +1093,8 @@ define double @fnmsub_d_contract(double %a, double %b, double %c) nounwind { ; LA64-CONTRACT-OFF-NEXT: fnmsub.d $fa0, $fa0, $fa1, $fa2 ; LA64-CONTRACT-OFF-NEXT: ret %mul = fmul contract double %a, %b - %sub = fsub contract double %c, %mul - ret double %sub + %negc = fneg contract double %c + %add = fadd contract double %negc, %mul + %negadd = fneg contract double %add + ret double %negadd } diff --git a/llvm/test/CodeGen/LoongArch/expand-call.ll b/llvm/test/CodeGen/LoongArch/expand-call.ll index 86bf4292665b72c88b4ba5ab3cf056b9c327edda..e0d179f92de6824b28b328127b9303c948d17e3c 100644 --- a/llvm/test/CodeGen/LoongArch/expand-call.ll +++ b/llvm/test/CodeGen/LoongArch/expand-call.ll @@ -1,6 +1,6 @@ ; RUN: llc --mtriple=loongarch64 --stop-before loongarch-prera-expand-pseudo \ ; RUN: --verify-machineinstrs < %s | FileCheck %s --check-prefix=NOEXPAND -; RUN: llc --mtriple=loongarch64 --stop-after loongarch-prera-expand-pseudo \ +; RUN: llc --mtriple=loongarch64 --stop-before machine-opt-remark-emitter \ ; RUN: --verify-machineinstrs < %s | FileCheck %s --check-prefix=EXPAND declare void @callee() diff --git a/llvm/test/CodeGen/LoongArch/float-fma.ll b/llvm/test/CodeGen/LoongArch/float-fma.ll index 54dc56784006f1270a1d3003e4740fa27cf63b6f..c236255d971a208bba1db9d6c9733d889eaa4027 100644 --- a/llvm/test/CodeGen/LoongArch/float-fma.ll +++ b/llvm/test/CodeGen/LoongArch/float-fma.ll @@ -236,13 +236,15 @@ define float @fnmsub_s(float %a, float %b, float %c) nounwind { ; LA32-CONTRACT-ON-LABEL: fnmsub_s: ; LA32-CONTRACT-ON: # %bb.0: ; LA32-CONTRACT-ON-NEXT: fmul.s $fa0, $fa0, $fa1 -; LA32-CONTRACT-ON-NEXT: fsub.s $fa0, $fa2, $fa0 +; LA32-CONTRACT-ON-NEXT: fsub.s $fa0, $fa0, $fa2 +; LA32-CONTRACT-ON-NEXT: fneg.s $fa0, $fa0 ; LA32-CONTRACT-ON-NEXT: ret ; ; LA32-CONTRACT-OFF-LABEL: fnmsub_s: ; LA32-CONTRACT-OFF: # %bb.0: ; LA32-CONTRACT-OFF-NEXT: fmul.s $fa0, $fa0, $fa1 -; LA32-CONTRACT-OFF-NEXT: fsub.s $fa0, $fa2, $fa0 +; LA32-CONTRACT-OFF-NEXT: fsub.s $fa0, $fa0, $fa2 +; LA32-CONTRACT-OFF-NEXT: fneg.s $fa0, $fa0 ; LA32-CONTRACT-OFF-NEXT: ret ; ; LA64-CONTRACT-FAST-LABEL: fnmsub_s: @@ -253,12 +255,98 @@ define float @fnmsub_s(float %a, float %b, float %c) nounwind { ; LA64-CONTRACT-ON-LABEL: fnmsub_s: ; LA64-CONTRACT-ON: # %bb.0: ; LA64-CONTRACT-ON-NEXT: fmul.s $fa0, $fa0, $fa1 -; LA64-CONTRACT-ON-NEXT: fsub.s $fa0, $fa2, $fa0 +; LA64-CONTRACT-ON-NEXT: fsub.s $fa0, $fa0, $fa2 +; LA64-CONTRACT-ON-NEXT: fneg.s $fa0, $fa0 ; LA64-CONTRACT-ON-NEXT: ret ; ; LA64-CONTRACT-OFF-LABEL: fnmsub_s: ; LA64-CONTRACT-OFF: # %bb.0: ; LA64-CONTRACT-OFF-NEXT: fmul.s $fa0, $fa0, $fa1 +; LA64-CONTRACT-OFF-NEXT: fsub.s $fa0, $fa0, $fa2 +; LA64-CONTRACT-OFF-NEXT: fneg.s $fa0, $fa0 +; LA64-CONTRACT-OFF-NEXT: ret + %negc = fneg float %c + %mul = fmul float %a, %b + %add = fadd float %mul, %negc + %neg = fneg float %add + ret float %neg +} + +define float @fnmsub_s_nsz(float %a, float %b, float %c) nounwind { +; LA32-CONTRACT-FAST-LABEL: fnmsub_s_nsz: +; LA32-CONTRACT-FAST: # %bb.0: +; LA32-CONTRACT-FAST-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: fnmsub_s_nsz: +; LA32-CONTRACT-ON: # %bb.0: +; LA32-CONTRACT-ON-NEXT: fmul.s $fa0, $fa0, $fa1 +; LA32-CONTRACT-ON-NEXT: fsub.s $fa0, $fa2, $fa0 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: fnmsub_s_nsz: +; LA32-CONTRACT-OFF: # %bb.0: +; LA32-CONTRACT-OFF-NEXT: fmul.s $fa0, $fa0, $fa1 +; LA32-CONTRACT-OFF-NEXT: fsub.s $fa0, $fa2, $fa0 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: fnmsub_s_nsz: +; LA64-CONTRACT-FAST: # %bb.0: +; LA64-CONTRACT-FAST-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: fnmsub_s_nsz: +; LA64-CONTRACT-ON: # %bb.0: +; LA64-CONTRACT-ON-NEXT: fmul.s $fa0, $fa0, $fa1 +; LA64-CONTRACT-ON-NEXT: fsub.s $fa0, $fa2, $fa0 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: fnmsub_s_nsz: +; LA64-CONTRACT-OFF: # %bb.0: +; LA64-CONTRACT-OFF-NEXT: fmul.s $fa0, $fa0, $fa1 +; LA64-CONTRACT-OFF-NEXT: fsub.s $fa0, $fa2, $fa0 +; LA64-CONTRACT-OFF-NEXT: ret + %nega = fneg nsz float %a + %mul = fmul nsz float %nega, %b + %add = fadd nsz float %mul, %c + ret float %add +} + +;; Check that fnmsub.s is not emitted. +define float @not_fnmsub_s(float %a, float %b, float %c) nounwind { +; LA32-CONTRACT-FAST-LABEL: not_fnmsub_s: +; LA32-CONTRACT-FAST: # %bb.0: +; LA32-CONTRACT-FAST-NEXT: fneg.s $fa0, $fa0 +; LA32-CONTRACT-FAST-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: not_fnmsub_s: +; LA32-CONTRACT-ON: # %bb.0: +; LA32-CONTRACT-ON-NEXT: fmul.s $fa0, $fa0, $fa1 +; LA32-CONTRACT-ON-NEXT: fsub.s $fa0, $fa2, $fa0 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: not_fnmsub_s: +; LA32-CONTRACT-OFF: # %bb.0: +; LA32-CONTRACT-OFF-NEXT: fmul.s $fa0, $fa0, $fa1 +; LA32-CONTRACT-OFF-NEXT: fsub.s $fa0, $fa2, $fa0 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: not_fnmsub_s: +; LA64-CONTRACT-FAST: # %bb.0: +; LA64-CONTRACT-FAST-NEXT: fneg.s $fa0, $fa0 +; LA64-CONTRACT-FAST-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: not_fnmsub_s: +; LA64-CONTRACT-ON: # %bb.0: +; LA64-CONTRACT-ON-NEXT: fmul.s $fa0, $fa0, $fa1 +; LA64-CONTRACT-ON-NEXT: fsub.s $fa0, $fa2, $fa0 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: not_fnmsub_s: +; LA64-CONTRACT-OFF: # %bb.0: +; LA64-CONTRACT-OFF-NEXT: fmul.s $fa0, $fa0, $fa1 ; LA64-CONTRACT-OFF-NEXT: fsub.s $fa0, $fa2, $fa0 ; LA64-CONTRACT-OFF-NEXT: ret %nega = fneg float %a @@ -483,6 +571,86 @@ define float @contract_fnmsub_s(float %a, float %b, float %c) nounwind { ; LA64-CONTRACT-OFF-LABEL: contract_fnmsub_s: ; LA64-CONTRACT-OFF: # %bb.0: ; LA64-CONTRACT-OFF-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-OFF-NEXT: ret + %negc = fneg contract float %c + %mul = fmul contract float %a, %b + %add = fadd contract float %mul, %negc + %neg = fneg contract float %add + ret float %neg +} + +define float @contract_fnmsub_s_nsz(float %a, float %b, float %c) nounwind { +; LA32-CONTRACT-FAST-LABEL: contract_fnmsub_s_nsz: +; LA32-CONTRACT-FAST: # %bb.0: +; LA32-CONTRACT-FAST-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: contract_fnmsub_s_nsz: +; LA32-CONTRACT-ON: # %bb.0: +; LA32-CONTRACT-ON-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: contract_fnmsub_s_nsz: +; LA32-CONTRACT-OFF: # %bb.0: +; LA32-CONTRACT-OFF-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: contract_fnmsub_s_nsz: +; LA64-CONTRACT-FAST: # %bb.0: +; LA64-CONTRACT-FAST-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: contract_fnmsub_s_nsz: +; LA64-CONTRACT-ON: # %bb.0: +; LA64-CONTRACT-ON-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: contract_fnmsub_s_nsz: +; LA64-CONTRACT-OFF: # %bb.0: +; LA64-CONTRACT-OFF-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-OFF-NEXT: ret + %nega = fneg contract nsz float %a + %mul = fmul contract nsz float %nega, %b + %add = fadd contract nsz float %mul, %c + ret float %add +} + +;; Check that fnmsub.s is not emitted. +define float @not_contract_fnmsub_s(float %a, float %b, float %c) nounwind { +; LA32-CONTRACT-FAST-LABEL: not_contract_fnmsub_s: +; LA32-CONTRACT-FAST: # %bb.0: +; LA32-CONTRACT-FAST-NEXT: fneg.s $fa0, $fa0 +; LA32-CONTRACT-FAST-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: not_contract_fnmsub_s: +; LA32-CONTRACT-ON: # %bb.0: +; LA32-CONTRACT-ON-NEXT: fneg.s $fa0, $fa0 +; LA32-CONTRACT-ON-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: not_contract_fnmsub_s: +; LA32-CONTRACT-OFF: # %bb.0: +; LA32-CONTRACT-OFF-NEXT: fneg.s $fa0, $fa0 +; LA32-CONTRACT-OFF-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: not_contract_fnmsub_s: +; LA64-CONTRACT-FAST: # %bb.0: +; LA64-CONTRACT-FAST-NEXT: fneg.s $fa0, $fa0 +; LA64-CONTRACT-FAST-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: not_contract_fnmsub_s: +; LA64-CONTRACT-ON: # %bb.0: +; LA64-CONTRACT-ON-NEXT: fneg.s $fa0, $fa0 +; LA64-CONTRACT-ON-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: not_contract_fnmsub_s: +; LA64-CONTRACT-OFF: # %bb.0: +; LA64-CONTRACT-OFF-NEXT: fneg.s $fa0, $fa0 +; LA64-CONTRACT-OFF-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 ; LA64-CONTRACT-OFF-NEXT: ret %nega = fneg contract float %a %mul = fmul contract float %nega, %b @@ -592,8 +760,8 @@ define float @fnmadd_s_intrinsics(float %a, float %b, float %c) nounwind { ; LA64-CONTRACT-OFF-NEXT: fnmadd.s $fa0, $fa0, $fa1, $fa2 ; LA64-CONTRACT-OFF-NEXT: ret %fma = call float @llvm.fma.f64(float %a, float %b, float %c) - %neg = fneg float %fma - ret float %neg + %negfma = fneg float %fma + ret float %negfma } define float @fnmadd_s_nsz_intrinsics(float %a, float %b, float %c) nounwind { @@ -704,44 +872,87 @@ define float @fnmsub_s_intrinsics(float %a, float %b, float %c) nounwind { ; LA64-CONTRACT-OFF-LABEL: fnmsub_s_intrinsics: ; LA64-CONTRACT-OFF: # %bb.0: ; LA64-CONTRACT-OFF-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-OFF-NEXT: ret + %negc = fneg float %c + %fma = call float @llvm.fma.f64(float %a, float %b, float %negc) + %negfma = fneg float %fma + ret float %negfma +} + +define float @fnmsub_s_nsz_intrinsics(float %a, float %b, float %c) nounwind { +; LA32-CONTRACT-FAST-LABEL: fnmsub_s_nsz_intrinsics: +; LA32-CONTRACT-FAST: # %bb.0: +; LA32-CONTRACT-FAST-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-FAST-NEXT: ret +; +; LA32-CONTRACT-ON-LABEL: fnmsub_s_nsz_intrinsics: +; LA32-CONTRACT-ON: # %bb.0: +; LA32-CONTRACT-ON-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-ON-NEXT: ret +; +; LA32-CONTRACT-OFF-LABEL: fnmsub_s_nsz_intrinsics: +; LA32-CONTRACT-OFF: # %bb.0: +; LA32-CONTRACT-OFF-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 +; LA32-CONTRACT-OFF-NEXT: ret +; +; LA64-CONTRACT-FAST-LABEL: fnmsub_s_nsz_intrinsics: +; LA64-CONTRACT-FAST: # %bb.0: +; LA64-CONTRACT-FAST-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-FAST-NEXT: ret +; +; LA64-CONTRACT-ON-LABEL: fnmsub_s_nsz_intrinsics: +; LA64-CONTRACT-ON: # %bb.0: +; LA64-CONTRACT-ON-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 +; LA64-CONTRACT-ON-NEXT: ret +; +; LA64-CONTRACT-OFF-LABEL: fnmsub_s_nsz_intrinsics: +; LA64-CONTRACT-OFF: # %bb.0: +; LA64-CONTRACT-OFF-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 ; LA64-CONTRACT-OFF-NEXT: ret %nega = fneg float %a - %fma = call float @llvm.fma.f64(float %nega, float %b, float %c) + %fma = call nsz float @llvm.fma.f64(float %nega, float %b, float %c) ret float %fma } -define float @fnmsub_s_swap_intrinsics(float %a, float %b, float %c) nounwind { -; LA32-CONTRACT-FAST-LABEL: fnmsub_s_swap_intrinsics: +;; Check that fnmsub.s is not emitted. +define float @not_fnmsub_s_intrinsics(float %a, float %b, float %c) nounwind { +; LA32-CONTRACT-FAST-LABEL: not_fnmsub_s_intrinsics: ; LA32-CONTRACT-FAST: # %bb.0: -; LA32-CONTRACT-FAST-NEXT: fnmsub.s $fa0, $fa1, $fa0, $fa2 +; LA32-CONTRACT-FAST-NEXT: fneg.s $fa0, $fa0 +; LA32-CONTRACT-FAST-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 ; LA32-CONTRACT-FAST-NEXT: ret ; -; LA32-CONTRACT-ON-LABEL: fnmsub_s_swap_intrinsics: +; LA32-CONTRACT-ON-LABEL: not_fnmsub_s_intrinsics: ; LA32-CONTRACT-ON: # %bb.0: -; LA32-CONTRACT-ON-NEXT: fnmsub.s $fa0, $fa1, $fa0, $fa2 +; LA32-CONTRACT-ON-NEXT: fneg.s $fa0, $fa0 +; LA32-CONTRACT-ON-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 ; LA32-CONTRACT-ON-NEXT: ret ; -; LA32-CONTRACT-OFF-LABEL: fnmsub_s_swap_intrinsics: +; LA32-CONTRACT-OFF-LABEL: not_fnmsub_s_intrinsics: ; LA32-CONTRACT-OFF: # %bb.0: -; LA32-CONTRACT-OFF-NEXT: fnmsub.s $fa0, $fa1, $fa0, $fa2 +; LA32-CONTRACT-OFF-NEXT: fneg.s $fa0, $fa0 +; LA32-CONTRACT-OFF-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 ; LA32-CONTRACT-OFF-NEXT: ret ; -; LA64-CONTRACT-FAST-LABEL: fnmsub_s_swap_intrinsics: +; LA64-CONTRACT-FAST-LABEL: not_fnmsub_s_intrinsics: ; LA64-CONTRACT-FAST: # %bb.0: -; LA64-CONTRACT-FAST-NEXT: fnmsub.s $fa0, $fa1, $fa0, $fa2 +; LA64-CONTRACT-FAST-NEXT: fneg.s $fa0, $fa0 +; LA64-CONTRACT-FAST-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 ; LA64-CONTRACT-FAST-NEXT: ret ; -; LA64-CONTRACT-ON-LABEL: fnmsub_s_swap_intrinsics: +; LA64-CONTRACT-ON-LABEL: not_fnmsub_s_intrinsics: ; LA64-CONTRACT-ON: # %bb.0: -; LA64-CONTRACT-ON-NEXT: fnmsub.s $fa0, $fa1, $fa0, $fa2 +; LA64-CONTRACT-ON-NEXT: fneg.s $fa0, $fa0 +; LA64-CONTRACT-ON-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 ; LA64-CONTRACT-ON-NEXT: ret ; -; LA64-CONTRACT-OFF-LABEL: fnmsub_s_swap_intrinsics: +; LA64-CONTRACT-OFF-LABEL: not_fnmsub_s_intrinsics: ; LA64-CONTRACT-OFF: # %bb.0: -; LA64-CONTRACT-OFF-NEXT: fnmsub.s $fa0, $fa1, $fa0, $fa2 +; LA64-CONTRACT-OFF-NEXT: fneg.s $fa0, $fa0 +; LA64-CONTRACT-OFF-NEXT: fmadd.s $fa0, $fa0, $fa1, $fa2 ; LA64-CONTRACT-OFF-NEXT: ret - %negb = fneg float %b - %fma = call float @llvm.fma.f64(float %a, float %negb, float %c) + %nega = fneg float %a + %fma = call float @llvm.fma.f64(float %nega, float %b, float %c) ret float %fma } @@ -882,6 +1093,8 @@ define float @fnmsub_s_contract(float %a, float %b, float %c) nounwind { ; LA64-CONTRACT-OFF-NEXT: fnmsub.s $fa0, $fa0, $fa1, $fa2 ; LA64-CONTRACT-OFF-NEXT: ret %mul = fmul contract float %a, %b - %sub = fsub contract float %c, %mul - ret float %sub + %negc = fneg contract float %c + %add = fadd contract float %negc, %mul + %negadd = fneg contract float %add + ret float %negadd } diff --git a/llvm/test/CodeGen/LoongArch/ghc-call-webkit.ll b/llvm/test/CodeGen/LoongArch/ghc-call-webkit.ll new file mode 100644 index 0000000000000000000000000000000000000000..589892f89dba60486b27b3c49b20a8c7f930edcd --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/ghc-call-webkit.ll @@ -0,0 +1,22 @@ +;; OHOS_LOCAL begin +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; REQUIRES: ark_gc_support +; RUN: llc --mtriple=loongarch64 --mattr=+d < %s | FileCheck %s + +;; Test that prolog/epilog are generated when ghc call webkit because we need to push regs to stack. + +define ghccc void @caller(i64 %a, i64 %b) nounwind { +; CHECK-LABEL: caller: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: move $a0, $s0 +; CHECK-NEXT: st.d $fp, $sp, 0 +; CHECK-NEXT: bl %plt(callee) +; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: ret + call webkit_jscc void @callee(i64 %a, i64 %b) + ret void +} + +declare void @callee(i64, i64) +;; OHOS_LOCAL end diff --git a/llvm/test/CodeGen/LoongArch/global-address.ll b/llvm/test/CodeGen/LoongArch/global-address.ll index 258c4e8691567fed4fc0bdbb5ec32ea51f6285ae..d32a17f488b1422df12553801fb0b170836b6738 100644 --- a/llvm/test/CodeGen/LoongArch/global-address.ll +++ b/llvm/test/CodeGen/LoongArch/global-address.ll @@ -3,6 +3,8 @@ ; RUN: llc --mtriple=loongarch32 --relocation-model=pic < %s | FileCheck %s --check-prefix=LA32PIC ; RUN: llc --mtriple=loongarch64 --relocation-model=static < %s | FileCheck %s --check-prefix=LA64NOPIC ; RUN: llc --mtriple=loongarch64 --relocation-model=pic < %s | FileCheck %s --check-prefix=LA64PIC +; RUN: llc --mtriple=loongarch64 --code-model=large --relocation-model=static < %s | FileCheck %s --check-prefix=LA64LARGENOPIC +; RUN: llc --mtriple=loongarch64 --code-model=large --relocation-model=pic < %s | FileCheck %s --check-prefix=LA64LARGEPIC @g = dso_local global i32 zeroinitializer, align 4 @G = global i32 zeroinitializer, align 4 @@ -47,6 +49,38 @@ define void @foo() nounwind { ; LA64PIC-NEXT: addi.d $a0, $a0, %pc_lo12(.Lg$local) ; LA64PIC-NEXT: ld.w $a0, $a0, 0 ; LA64PIC-NEXT: ret +; +; LA64LARGENOPIC-LABEL: foo: +; LA64LARGENOPIC: # %bb.0: +; LA64LARGENOPIC-NEXT: pcalau12i $a0, %got_pc_hi20(G) +; LA64LARGENOPIC-NEXT: addi.d $t8, $zero, %got_pc_lo12(G) +; LA64LARGENOPIC-NEXT: lu32i.d $t8, %got64_pc_lo20(G) +; LA64LARGENOPIC-NEXT: lu52i.d $t8, $t8, %got64_pc_hi12(G) +; LA64LARGENOPIC-NEXT: ldx.d $a0, $t8, $a0 +; LA64LARGENOPIC-NEXT: ld.w $a0, $a0, 0 +; LA64LARGENOPIC-NEXT: pcalau12i $a0, %pc_hi20(g) +; LA64LARGENOPIC-NEXT: addi.d $t8, $zero, %pc_lo12(g) +; LA64LARGENOPIC-NEXT: lu32i.d $t8, %pc64_lo20(g) +; LA64LARGENOPIC-NEXT: lu52i.d $t8, $t8, %pc64_hi12(g) +; LA64LARGENOPIC-NEXT: add.d $a0, $t8, $a0 +; LA64LARGENOPIC-NEXT: ld.w $a0, $a0, 0 +; LA64LARGENOPIC-NEXT: ret +; +; LA64LARGEPIC-LABEL: foo: +; LA64LARGEPIC: # %bb.0: +; LA64LARGEPIC-NEXT: pcalau12i $a0, %got_pc_hi20(G) +; LA64LARGEPIC-NEXT: addi.d $t8, $zero, %got_pc_lo12(G) +; LA64LARGEPIC-NEXT: lu32i.d $t8, %got64_pc_lo20(G) +; LA64LARGEPIC-NEXT: lu52i.d $t8, $t8, %got64_pc_hi12(G) +; LA64LARGEPIC-NEXT: ldx.d $a0, $t8, $a0 +; LA64LARGEPIC-NEXT: ld.w $a0, $a0, 0 +; LA64LARGEPIC-NEXT: pcalau12i $a0, %pc_hi20(.Lg$local) +; LA64LARGEPIC-NEXT: addi.d $t8, $zero, %pc_lo12(.Lg$local) +; LA64LARGEPIC-NEXT: lu32i.d $t8, %pc64_lo20(.Lg$local) +; LA64LARGEPIC-NEXT: lu52i.d $t8, $t8, %pc64_hi12(.Lg$local) +; LA64LARGEPIC-NEXT: add.d $a0, $t8, $a0 +; LA64LARGEPIC-NEXT: ld.w $a0, $a0, 0 +; LA64LARGEPIC-NEXT: ret %V = load volatile i32, ptr @G %v = load volatile i32, ptr @g ret void diff --git a/llvm/test/CodeGen/LoongArch/global-variable-code-model.ll b/llvm/test/CodeGen/LoongArch/global-variable-code-model.ll new file mode 100644 index 0000000000000000000000000000000000000000..aa4780834ac3e825d4d1825c856f9bf6c0cf5fd0 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/global-variable-code-model.ll @@ -0,0 +1,44 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s + +@a= external dso_local global i32, code_model "small", align 4 + +define dso_local signext i32 @local_small() #0 { +; CHECK-LABEL: local_small: +; CHECK: # %bb.0: +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(a) +; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(a) +; CHECK-NEXT: ld.w $a0, $a0, 0 +; CHECK-NEXT: ret + %1 = load i32, ptr @a, align 4 + ret i32 %1 +} + +@b= external dso_local global i32, code_model "large", align 4 + +define dso_local signext i32 @local_large() #0 { +; CHECK-LABEL: local_large: +; CHECK: # %bb.0: +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(b) +; CHECK-NEXT: addi.d $t8, $zero, %pc_lo12(b) +; CHECK-NEXT: lu32i.d $t8, %pc64_lo20(b) +; CHECK-NEXT: lu52i.d $t8, $t8, %pc64_hi12(b) +; CHECK-NEXT: add.d $a0, $t8, $a0 +; CHECK-NEXT: ld.w $a0, $a0, 0 +; CHECK-NEXT: ret + %1 = load i32, ptr @b, align 4 + ret i32 %1 +} + +@c= external global i32, code_model "large", align 4 + +define dso_local signext i32 @non_local_large() #0 { +; CHECK-LABEL: non_local_large: +; CHECK: # %bb.0: +; CHECK-NEXT: pcalau12i $a0, %got_pc_hi20(c) +; CHECK-NEXT: ld.d $a0, $a0, %got_pc_lo12(c) +; CHECK-NEXT: ld.w $a0, $a0, 0 +; CHECK-NEXT: ret + %1 = load i32, ptr @c, align 4 + ret i32 %1 +} diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll index b8c0cb257122a792c43705478a095ec7c406b653..a8644497146de44a5bc8feeb13cf33a9b9839d81 100644 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg.ll @@ -73,6 +73,7 @@ define void @cmpxchg_i16_acquire_acquire(ptr %ptr, i16 %cmp, i16 %val) nounwind define void @cmpxchg_i32_acquire_acquire(ptr %ptr, i32 %cmp, i32 %val) nounwind { ; LA64-LABEL: cmpxchg_i32_acquire_acquire: ; LA64: # %bb.0: +; LA64-NEXT: addi.w $a1, $a1, 0 ; LA64-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 ; LA64-NEXT: ll.w $a3, $a0, 0 ; LA64-NEXT: bne $a3, $a1, .LBB2_3 @@ -186,9 +187,10 @@ define i16 @cmpxchg_i16_acquire_acquire_reti16(ptr %ptr, i16 %cmp, i16 %val) nou define i32 @cmpxchg_i32_acquire_acquire_reti32(ptr %ptr, i32 %cmp, i32 %val) nounwind { ; LA64-LABEL: cmpxchg_i32_acquire_acquire_reti32: ; LA64: # %bb.0: +; LA64-NEXT: addi.w $a3, $a1, 0 ; LA64-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 -; LA64-NEXT: ll.w $a3, $a0, 0 -; LA64-NEXT: bne $a3, $a1, .LBB6_3 +; LA64-NEXT: ll.w $a1, $a0, 0 +; LA64-NEXT: bne $a1, $a3, .LBB6_3 ; LA64-NEXT: # %bb.2: # in Loop: Header=BB6_1 Depth=1 ; LA64-NEXT: dbar 0 ; LA64-NEXT: move $a4, $a2 @@ -198,7 +200,7 @@ define i32 @cmpxchg_i32_acquire_acquire_reti32(ptr %ptr, i32 %cmp, i32 %val) nou ; LA64-NEXT: .LBB6_3: ; LA64-NEXT: dbar 1792 ; LA64-NEXT: .LBB6_4: -; LA64-NEXT: move $a0, $a3 +; LA64-NEXT: move $a0, $a1 ; LA64-NEXT: ret %tmp = cmpxchg ptr %ptr, i32 %cmp, i32 %val acquire acquire %res = extractvalue { i32, i1 } %tmp, 0 @@ -311,6 +313,7 @@ define i1 @cmpxchg_i16_acquire_acquire_reti1(ptr %ptr, i16 %cmp, i16 %val) nounw define i1 @cmpxchg_i32_acquire_acquire_reti1(ptr %ptr, i32 %cmp, i32 %val) nounwind { ; LA64-LABEL: cmpxchg_i32_acquire_acquire_reti1: ; LA64: # %bb.0: +; LA64-NEXT: addi.w $a1, $a1, 0 ; LA64-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 ; LA64-NEXT: ll.w $a3, $a0, 0 ; LA64-NEXT: bne $a3, $a1, .LBB10_3 @@ -323,8 +326,7 @@ define i1 @cmpxchg_i32_acquire_acquire_reti1(ptr %ptr, i32 %cmp, i32 %val) nounw ; LA64-NEXT: .LBB10_3: ; LA64-NEXT: dbar 1792 ; LA64-NEXT: .LBB10_4: -; LA64-NEXT: addi.w $a0, $a1, 0 -; LA64-NEXT: xor $a0, $a3, $a0 +; LA64-NEXT: xor $a0, $a3, $a1 ; LA64-NEXT: sltui $a0, $a0, 1 ; LA64-NEXT: ret %tmp = cmpxchg ptr %ptr, i32 %cmp, i32 %val acquire acquire diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll index 3d7aa871b9c9ff25eec8b4fe2cfd0168d505fe25..0fb34d95adc03c92d2d11a1aee8e47ddd45016d2 100644 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-fp.ll @@ -10,13 +10,14 @@ define float @float_fadd_acquire(ptr %p) nounwind { ; LA64F-NEXT: fld.s $fa0, $a0, 0 ; LA64F-NEXT: addi.w $a1, $zero, 1 ; LA64F-NEXT: movgr2fr.w $fa1, $a1 +; LA64F-NEXT: ffint.s.w $fa1, $fa1 ; LA64F-NEXT: .LBB0_1: # %atomicrmw.start ; LA64F-NEXT: # =>This Loop Header: Depth=1 ; LA64F-NEXT: # Child Loop BB0_3 Depth 2 -; LA64F-NEXT: ffint.s.w $fa2, $fa1 -; LA64F-NEXT: fadd.s $fa2, $fa0, $fa2 +; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 ; LA64F-NEXT: movfr2gr.s $a1, $fa2 ; LA64F-NEXT: movfr2gr.s $a2, $fa0 +; LA64F-NEXT: addi.w $a2, $a2, 0 ; LA64F-NEXT: .LBB0_3: # %atomicrmw.start ; LA64F-NEXT: # Parent Loop BB0_1 Depth=1 ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 @@ -35,8 +36,7 @@ define float @float_fadd_acquire(ptr %p) nounwind { ; LA64F-NEXT: .LBB0_6: # %atomicrmw.start ; LA64F-NEXT: # in Loop: Header=BB0_1 Depth=1 ; LA64F-NEXT: movgr2fr.w $fa0, $a3 -; LA64F-NEXT: addi.w $a1, $a2, 0 -; LA64F-NEXT: bne $a3, $a1, .LBB0_1 +; LA64F-NEXT: bne $a3, $a2, .LBB0_1 ; LA64F-NEXT: # %bb.2: # %atomicrmw.end ; LA64F-NEXT: ret ; @@ -45,13 +45,14 @@ define float @float_fadd_acquire(ptr %p) nounwind { ; LA64D-NEXT: fld.s $fa0, $a0, 0 ; LA64D-NEXT: addi.w $a1, $zero, 1 ; LA64D-NEXT: movgr2fr.w $fa1, $a1 +; LA64D-NEXT: ffint.s.w $fa1, $fa1 ; LA64D-NEXT: .LBB0_1: # %atomicrmw.start ; LA64D-NEXT: # =>This Loop Header: Depth=1 ; LA64D-NEXT: # Child Loop BB0_3 Depth 2 -; LA64D-NEXT: ffint.s.w $fa2, $fa1 -; LA64D-NEXT: fadd.s $fa2, $fa0, $fa2 +; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 ; LA64D-NEXT: movfr2gr.s $a1, $fa2 ; LA64D-NEXT: movfr2gr.s $a2, $fa0 +; LA64D-NEXT: addi.w $a2, $a2, 0 ; LA64D-NEXT: .LBB0_3: # %atomicrmw.start ; LA64D-NEXT: # Parent Loop BB0_1 Depth=1 ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 @@ -70,8 +71,7 @@ define float @float_fadd_acquire(ptr %p) nounwind { ; LA64D-NEXT: .LBB0_6: # %atomicrmw.start ; LA64D-NEXT: # in Loop: Header=BB0_1 Depth=1 ; LA64D-NEXT: movgr2fr.w $fa0, $a3 -; LA64D-NEXT: addi.w $a1, $a2, 0 -; LA64D-NEXT: bne $a3, $a1, .LBB0_1 +; LA64D-NEXT: bne $a3, $a2, .LBB0_1 ; LA64D-NEXT: # %bb.2: # %atomicrmw.end ; LA64D-NEXT: ret %v = atomicrmw fadd ptr %p, float 1.0 acquire, align 4 @@ -91,6 +91,7 @@ define float @float_fsub_acquire(ptr %p) nounwind { ; LA64F-NEXT: fadd.s $fa2, $fa0, $fa1 ; LA64F-NEXT: movfr2gr.s $a1, $fa2 ; LA64F-NEXT: movfr2gr.s $a2, $fa0 +; LA64F-NEXT: addi.w $a2, $a2, 0 ; LA64F-NEXT: .LBB1_3: # %atomicrmw.start ; LA64F-NEXT: # Parent Loop BB1_1 Depth=1 ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 @@ -109,8 +110,7 @@ define float @float_fsub_acquire(ptr %p) nounwind { ; LA64F-NEXT: .LBB1_6: # %atomicrmw.start ; LA64F-NEXT: # in Loop: Header=BB1_1 Depth=1 ; LA64F-NEXT: movgr2fr.w $fa0, $a3 -; LA64F-NEXT: addi.w $a1, $a2, 0 -; LA64F-NEXT: bne $a3, $a1, .LBB1_1 +; LA64F-NEXT: bne $a3, $a2, .LBB1_1 ; LA64F-NEXT: # %bb.2: # %atomicrmw.end ; LA64F-NEXT: ret ; @@ -126,6 +126,7 @@ define float @float_fsub_acquire(ptr %p) nounwind { ; LA64D-NEXT: fadd.s $fa2, $fa0, $fa1 ; LA64D-NEXT: movfr2gr.s $a1, $fa2 ; LA64D-NEXT: movfr2gr.s $a2, $fa0 +; LA64D-NEXT: addi.w $a2, $a2, 0 ; LA64D-NEXT: .LBB1_3: # %atomicrmw.start ; LA64D-NEXT: # Parent Loop BB1_1 Depth=1 ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 @@ -144,8 +145,7 @@ define float @float_fsub_acquire(ptr %p) nounwind { ; LA64D-NEXT: .LBB1_6: # %atomicrmw.start ; LA64D-NEXT: # in Loop: Header=BB1_1 Depth=1 ; LA64D-NEXT: movgr2fr.w $fa0, $a3 -; LA64D-NEXT: addi.w $a1, $a2, 0 -; LA64D-NEXT: bne $a3, $a1, .LBB1_1 +; LA64D-NEXT: bne $a3, $a2, .LBB1_1 ; LA64D-NEXT: # %bb.2: # %atomicrmw.end ; LA64D-NEXT: ret %v = atomicrmw fsub ptr %p, float 1.0 acquire, align 4 @@ -158,14 +158,15 @@ define float @float_fmin_acquire(ptr %p) nounwind { ; LA64F-NEXT: fld.s $fa0, $a0, 0 ; LA64F-NEXT: addi.w $a1, $zero, 1 ; LA64F-NEXT: movgr2fr.w $fa1, $a1 +; LA64F-NEXT: ffint.s.w $fa1, $fa1 ; LA64F-NEXT: .LBB2_1: # %atomicrmw.start ; LA64F-NEXT: # =>This Loop Header: Depth=1 ; LA64F-NEXT: # Child Loop BB2_3 Depth 2 -; LA64F-NEXT: ffint.s.w $fa2, $fa1 -; LA64F-NEXT: fmax.s $fa3, $fa0, $fa0 -; LA64F-NEXT: fmin.s $fa2, $fa3, $fa2 +; LA64F-NEXT: fmax.s $fa2, $fa0, $fa0 +; LA64F-NEXT: fmin.s $fa2, $fa2, $fa1 ; LA64F-NEXT: movfr2gr.s $a1, $fa2 ; LA64F-NEXT: movfr2gr.s $a2, $fa0 +; LA64F-NEXT: addi.w $a2, $a2, 0 ; LA64F-NEXT: .LBB2_3: # %atomicrmw.start ; LA64F-NEXT: # Parent Loop BB2_1 Depth=1 ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 @@ -184,8 +185,7 @@ define float @float_fmin_acquire(ptr %p) nounwind { ; LA64F-NEXT: .LBB2_6: # %atomicrmw.start ; LA64F-NEXT: # in Loop: Header=BB2_1 Depth=1 ; LA64F-NEXT: movgr2fr.w $fa0, $a3 -; LA64F-NEXT: addi.w $a1, $a2, 0 -; LA64F-NEXT: bne $a3, $a1, .LBB2_1 +; LA64F-NEXT: bne $a3, $a2, .LBB2_1 ; LA64F-NEXT: # %bb.2: # %atomicrmw.end ; LA64F-NEXT: ret ; @@ -194,14 +194,15 @@ define float @float_fmin_acquire(ptr %p) nounwind { ; LA64D-NEXT: fld.s $fa0, $a0, 0 ; LA64D-NEXT: addi.w $a1, $zero, 1 ; LA64D-NEXT: movgr2fr.w $fa1, $a1 +; LA64D-NEXT: ffint.s.w $fa1, $fa1 ; LA64D-NEXT: .LBB2_1: # %atomicrmw.start ; LA64D-NEXT: # =>This Loop Header: Depth=1 ; LA64D-NEXT: # Child Loop BB2_3 Depth 2 -; LA64D-NEXT: ffint.s.w $fa2, $fa1 -; LA64D-NEXT: fmax.s $fa3, $fa0, $fa0 -; LA64D-NEXT: fmin.s $fa2, $fa3, $fa2 +; LA64D-NEXT: fmax.s $fa2, $fa0, $fa0 +; LA64D-NEXT: fmin.s $fa2, $fa2, $fa1 ; LA64D-NEXT: movfr2gr.s $a1, $fa2 ; LA64D-NEXT: movfr2gr.s $a2, $fa0 +; LA64D-NEXT: addi.w $a2, $a2, 0 ; LA64D-NEXT: .LBB2_3: # %atomicrmw.start ; LA64D-NEXT: # Parent Loop BB2_1 Depth=1 ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 @@ -220,8 +221,7 @@ define float @float_fmin_acquire(ptr %p) nounwind { ; LA64D-NEXT: .LBB2_6: # %atomicrmw.start ; LA64D-NEXT: # in Loop: Header=BB2_1 Depth=1 ; LA64D-NEXT: movgr2fr.w $fa0, $a3 -; LA64D-NEXT: addi.w $a1, $a2, 0 -; LA64D-NEXT: bne $a3, $a1, .LBB2_1 +; LA64D-NEXT: bne $a3, $a2, .LBB2_1 ; LA64D-NEXT: # %bb.2: # %atomicrmw.end ; LA64D-NEXT: ret %v = atomicrmw fmin ptr %p, float 1.0 acquire, align 4 @@ -234,14 +234,15 @@ define float @float_fmax_acquire(ptr %p) nounwind { ; LA64F-NEXT: fld.s $fa0, $a0, 0 ; LA64F-NEXT: addi.w $a1, $zero, 1 ; LA64F-NEXT: movgr2fr.w $fa1, $a1 +; LA64F-NEXT: ffint.s.w $fa1, $fa1 ; LA64F-NEXT: .LBB3_1: # %atomicrmw.start ; LA64F-NEXT: # =>This Loop Header: Depth=1 ; LA64F-NEXT: # Child Loop BB3_3 Depth 2 -; LA64F-NEXT: ffint.s.w $fa2, $fa1 -; LA64F-NEXT: fmax.s $fa3, $fa0, $fa0 -; LA64F-NEXT: fmax.s $fa2, $fa3, $fa2 +; LA64F-NEXT: fmax.s $fa2, $fa0, $fa0 +; LA64F-NEXT: fmax.s $fa2, $fa2, $fa1 ; LA64F-NEXT: movfr2gr.s $a1, $fa2 ; LA64F-NEXT: movfr2gr.s $a2, $fa0 +; LA64F-NEXT: addi.w $a2, $a2, 0 ; LA64F-NEXT: .LBB3_3: # %atomicrmw.start ; LA64F-NEXT: # Parent Loop BB3_1 Depth=1 ; LA64F-NEXT: # => This Inner Loop Header: Depth=2 @@ -260,8 +261,7 @@ define float @float_fmax_acquire(ptr %p) nounwind { ; LA64F-NEXT: .LBB3_6: # %atomicrmw.start ; LA64F-NEXT: # in Loop: Header=BB3_1 Depth=1 ; LA64F-NEXT: movgr2fr.w $fa0, $a3 -; LA64F-NEXT: addi.w $a1, $a2, 0 -; LA64F-NEXT: bne $a3, $a1, .LBB3_1 +; LA64F-NEXT: bne $a3, $a2, .LBB3_1 ; LA64F-NEXT: # %bb.2: # %atomicrmw.end ; LA64F-NEXT: ret ; @@ -270,14 +270,15 @@ define float @float_fmax_acquire(ptr %p) nounwind { ; LA64D-NEXT: fld.s $fa0, $a0, 0 ; LA64D-NEXT: addi.w $a1, $zero, 1 ; LA64D-NEXT: movgr2fr.w $fa1, $a1 +; LA64D-NEXT: ffint.s.w $fa1, $fa1 ; LA64D-NEXT: .LBB3_1: # %atomicrmw.start ; LA64D-NEXT: # =>This Loop Header: Depth=1 ; LA64D-NEXT: # Child Loop BB3_3 Depth 2 -; LA64D-NEXT: ffint.s.w $fa2, $fa1 -; LA64D-NEXT: fmax.s $fa3, $fa0, $fa0 -; LA64D-NEXT: fmax.s $fa2, $fa3, $fa2 +; LA64D-NEXT: fmax.s $fa2, $fa0, $fa0 +; LA64D-NEXT: fmax.s $fa2, $fa2, $fa1 ; LA64D-NEXT: movfr2gr.s $a1, $fa2 ; LA64D-NEXT: movfr2gr.s $a2, $fa0 +; LA64D-NEXT: addi.w $a2, $a2, 0 ; LA64D-NEXT: .LBB3_3: # %atomicrmw.start ; LA64D-NEXT: # Parent Loop BB3_1 Depth=1 ; LA64D-NEXT: # => This Inner Loop Header: Depth=2 @@ -296,8 +297,7 @@ define float @float_fmax_acquire(ptr %p) nounwind { ; LA64D-NEXT: .LBB3_6: # %atomicrmw.start ; LA64D-NEXT: # in Loop: Header=BB3_1 Depth=1 ; LA64D-NEXT: movgr2fr.w $fa0, $a3 -; LA64D-NEXT: addi.w $a1, $a2, 0 -; LA64D-NEXT: bne $a3, $a1, .LBB3_1 +; LA64D-NEXT: bne $a3, $a2, .LBB3_1 ; LA64D-NEXT: # %bb.2: # %atomicrmw.end ; LA64D-NEXT: ret %v = atomicrmw fmax ptr %p, float 1.0 acquire, align 4 @@ -307,43 +307,46 @@ define float @float_fmax_acquire(ptr %p) nounwind { define double @double_fadd_acquire(ptr %p) nounwind { ; LA64F-LABEL: double_fadd_acquire: ; LA64F: # %bb.0: -; LA64F-NEXT: addi.d $sp, $sp, -64 -; LA64F-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill -; LA64F-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill -; LA64F-NEXT: st.d $s0, $sp, 40 # 8-byte Folded Spill -; LA64F-NEXT: st.d $s1, $sp, 32 # 8-byte Folded Spill -; LA64F-NEXT: st.d $s2, $sp, 24 # 8-byte Folded Spill -; LA64F-NEXT: st.d $s3, $sp, 16 # 8-byte Folded Spill +; LA64F-NEXT: addi.d $sp, $sp, -80 +; LA64F-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill +; LA64F-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s4, $sp, 24 # 8-byte Folded Spill ; LA64F-NEXT: move $fp, $a0 ; LA64F-NEXT: ld.d $a0, $a0, 0 -; LA64F-NEXT: ori $s0, $zero, 8 -; LA64F-NEXT: addi.d $s1, $sp, 8 -; LA64F-NEXT: addi.d $s2, $sp, 0 -; LA64F-NEXT: ori $s3, $zero, 2 +; LA64F-NEXT: lu52i.d $s0, $zero, 1023 +; LA64F-NEXT: ori $s1, $zero, 8 +; LA64F-NEXT: addi.d $s2, $sp, 16 +; LA64F-NEXT: addi.d $s3, $sp, 8 +; LA64F-NEXT: ori $s4, $zero, 2 ; LA64F-NEXT: .LBB4_1: # %atomicrmw.start ; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 -; LA64F-NEXT: st.d $a0, $sp, 8 -; LA64F-NEXT: lu52i.d $a1, $zero, 1023 +; LA64F-NEXT: st.d $a0, $sp, 16 +; LA64F-NEXT: move $a1, $s0 ; LA64F-NEXT: bl %plt(__adddf3) -; LA64F-NEXT: st.d $a0, $sp, 0 -; LA64F-NEXT: move $a0, $s0 +; LA64F-NEXT: st.d $a0, $sp, 8 +; LA64F-NEXT: move $a0, $s1 ; LA64F-NEXT: move $a1, $fp -; LA64F-NEXT: move $a2, $s1 -; LA64F-NEXT: move $a3, $s2 -; LA64F-NEXT: move $a4, $s3 -; LA64F-NEXT: move $a5, $s3 +; LA64F-NEXT: move $a2, $s2 +; LA64F-NEXT: move $a3, $s3 +; LA64F-NEXT: move $a4, $s4 +; LA64F-NEXT: move $a5, $s4 ; LA64F-NEXT: bl %plt(__atomic_compare_exchange) ; LA64F-NEXT: move $a1, $a0 -; LA64F-NEXT: ld.d $a0, $sp, 8 +; LA64F-NEXT: ld.d $a0, $sp, 16 ; LA64F-NEXT: beqz $a1, .LBB4_1 ; LA64F-NEXT: # %bb.2: # %atomicrmw.end -; LA64F-NEXT: ld.d $s3, $sp, 16 # 8-byte Folded Reload -; LA64F-NEXT: ld.d $s2, $sp, 24 # 8-byte Folded Reload -; LA64F-NEXT: ld.d $s1, $sp, 32 # 8-byte Folded Reload -; LA64F-NEXT: ld.d $s0, $sp, 40 # 8-byte Folded Reload -; LA64F-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload -; LA64F-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload -; LA64F-NEXT: addi.d $sp, $sp, 64 +; LA64F-NEXT: ld.d $s4, $sp, 24 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload +; LA64F-NEXT: addi.d $sp, $sp, 80 ; LA64F-NEXT: ret ; ; LA64D-LABEL: double_fadd_acquire: @@ -359,7 +362,8 @@ define double @double_fadd_acquire(ptr %p) nounwind { ; LA64D-NEXT: move $fp, $a0 ; LA64D-NEXT: fld.d $fa0, $a0, 0 ; LA64D-NEXT: addi.d $a0, $zero, 1 -; LA64D-NEXT: movgr2fr.d $fs0, $a0 +; LA64D-NEXT: movgr2fr.d $fa1, $a0 +; LA64D-NEXT: ffint.d.l $fs0, $fa1 ; LA64D-NEXT: ori $s0, $zero, 8 ; LA64D-NEXT: addi.d $s1, $sp, 16 ; LA64D-NEXT: addi.d $s2, $sp, 8 @@ -367,8 +371,7 @@ define double @double_fadd_acquire(ptr %p) nounwind { ; LA64D-NEXT: .LBB4_1: # %atomicrmw.start ; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 ; LA64D-NEXT: fst.d $fa0, $sp, 16 -; LA64D-NEXT: ffint.d.l $fa1, $fs0 -; LA64D-NEXT: fadd.d $fa0, $fa0, $fa1 +; LA64D-NEXT: fadd.d $fa0, $fa0, $fs0 ; LA64D-NEXT: fst.d $fa0, $sp, 8 ; LA64D-NEXT: move $a0, $s0 ; LA64D-NEXT: move $a1, $fp @@ -396,43 +399,46 @@ define double @double_fadd_acquire(ptr %p) nounwind { define double @double_fsub_acquire(ptr %p) nounwind { ; LA64F-LABEL: double_fsub_acquire: ; LA64F: # %bb.0: -; LA64F-NEXT: addi.d $sp, $sp, -64 -; LA64F-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill -; LA64F-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill -; LA64F-NEXT: st.d $s0, $sp, 40 # 8-byte Folded Spill -; LA64F-NEXT: st.d $s1, $sp, 32 # 8-byte Folded Spill -; LA64F-NEXT: st.d $s2, $sp, 24 # 8-byte Folded Spill -; LA64F-NEXT: st.d $s3, $sp, 16 # 8-byte Folded Spill +; LA64F-NEXT: addi.d $sp, $sp, -80 +; LA64F-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill +; LA64F-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s4, $sp, 24 # 8-byte Folded Spill ; LA64F-NEXT: move $fp, $a0 ; LA64F-NEXT: ld.d $a0, $a0, 0 -; LA64F-NEXT: ori $s0, $zero, 8 -; LA64F-NEXT: addi.d $s1, $sp, 8 -; LA64F-NEXT: addi.d $s2, $sp, 0 -; LA64F-NEXT: ori $s3, $zero, 2 +; LA64F-NEXT: lu52i.d $s0, $zero, -1025 +; LA64F-NEXT: ori $s1, $zero, 8 +; LA64F-NEXT: addi.d $s2, $sp, 16 +; LA64F-NEXT: addi.d $s3, $sp, 8 +; LA64F-NEXT: ori $s4, $zero, 2 ; LA64F-NEXT: .LBB5_1: # %atomicrmw.start ; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 -; LA64F-NEXT: st.d $a0, $sp, 8 -; LA64F-NEXT: lu52i.d $a1, $zero, -1025 +; LA64F-NEXT: st.d $a0, $sp, 16 +; LA64F-NEXT: move $a1, $s0 ; LA64F-NEXT: bl %plt(__adddf3) -; LA64F-NEXT: st.d $a0, $sp, 0 -; LA64F-NEXT: move $a0, $s0 +; LA64F-NEXT: st.d $a0, $sp, 8 +; LA64F-NEXT: move $a0, $s1 ; LA64F-NEXT: move $a1, $fp -; LA64F-NEXT: move $a2, $s1 -; LA64F-NEXT: move $a3, $s2 -; LA64F-NEXT: move $a4, $s3 -; LA64F-NEXT: move $a5, $s3 +; LA64F-NEXT: move $a2, $s2 +; LA64F-NEXT: move $a3, $s3 +; LA64F-NEXT: move $a4, $s4 +; LA64F-NEXT: move $a5, $s4 ; LA64F-NEXT: bl %plt(__atomic_compare_exchange) ; LA64F-NEXT: move $a1, $a0 -; LA64F-NEXT: ld.d $a0, $sp, 8 +; LA64F-NEXT: ld.d $a0, $sp, 16 ; LA64F-NEXT: beqz $a1, .LBB5_1 ; LA64F-NEXT: # %bb.2: # %atomicrmw.end -; LA64F-NEXT: ld.d $s3, $sp, 16 # 8-byte Folded Reload -; LA64F-NEXT: ld.d $s2, $sp, 24 # 8-byte Folded Reload -; LA64F-NEXT: ld.d $s1, $sp, 32 # 8-byte Folded Reload -; LA64F-NEXT: ld.d $s0, $sp, 40 # 8-byte Folded Reload -; LA64F-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload -; LA64F-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload -; LA64F-NEXT: addi.d $sp, $sp, 64 +; LA64F-NEXT: ld.d $s4, $sp, 24 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload +; LA64F-NEXT: addi.d $sp, $sp, 80 ; LA64F-NEXT: ret ; ; LA64D-LABEL: double_fsub_acquire: @@ -485,43 +491,46 @@ define double @double_fsub_acquire(ptr %p) nounwind { define double @double_fmin_acquire(ptr %p) nounwind { ; LA64F-LABEL: double_fmin_acquire: ; LA64F: # %bb.0: -; LA64F-NEXT: addi.d $sp, $sp, -64 -; LA64F-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill -; LA64F-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill -; LA64F-NEXT: st.d $s0, $sp, 40 # 8-byte Folded Spill -; LA64F-NEXT: st.d $s1, $sp, 32 # 8-byte Folded Spill -; LA64F-NEXT: st.d $s2, $sp, 24 # 8-byte Folded Spill -; LA64F-NEXT: st.d $s3, $sp, 16 # 8-byte Folded Spill +; LA64F-NEXT: addi.d $sp, $sp, -80 +; LA64F-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill +; LA64F-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s4, $sp, 24 # 8-byte Folded Spill ; LA64F-NEXT: move $fp, $a0 ; LA64F-NEXT: ld.d $a0, $a0, 0 -; LA64F-NEXT: ori $s0, $zero, 8 -; LA64F-NEXT: addi.d $s1, $sp, 8 -; LA64F-NEXT: addi.d $s2, $sp, 0 -; LA64F-NEXT: ori $s3, $zero, 2 +; LA64F-NEXT: lu52i.d $s0, $zero, 1023 +; LA64F-NEXT: ori $s1, $zero, 8 +; LA64F-NEXT: addi.d $s2, $sp, 16 +; LA64F-NEXT: addi.d $s3, $sp, 8 +; LA64F-NEXT: ori $s4, $zero, 2 ; LA64F-NEXT: .LBB6_1: # %atomicrmw.start ; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 -; LA64F-NEXT: st.d $a0, $sp, 8 -; LA64F-NEXT: lu52i.d $a1, $zero, 1023 +; LA64F-NEXT: st.d $a0, $sp, 16 +; LA64F-NEXT: move $a1, $s0 ; LA64F-NEXT: bl %plt(fmin) -; LA64F-NEXT: st.d $a0, $sp, 0 -; LA64F-NEXT: move $a0, $s0 +; LA64F-NEXT: st.d $a0, $sp, 8 +; LA64F-NEXT: move $a0, $s1 ; LA64F-NEXT: move $a1, $fp -; LA64F-NEXT: move $a2, $s1 -; LA64F-NEXT: move $a3, $s2 -; LA64F-NEXT: move $a4, $s3 -; LA64F-NEXT: move $a5, $s3 +; LA64F-NEXT: move $a2, $s2 +; LA64F-NEXT: move $a3, $s3 +; LA64F-NEXT: move $a4, $s4 +; LA64F-NEXT: move $a5, $s4 ; LA64F-NEXT: bl %plt(__atomic_compare_exchange) ; LA64F-NEXT: move $a1, $a0 -; LA64F-NEXT: ld.d $a0, $sp, 8 +; LA64F-NEXT: ld.d $a0, $sp, 16 ; LA64F-NEXT: beqz $a1, .LBB6_1 ; LA64F-NEXT: # %bb.2: # %atomicrmw.end -; LA64F-NEXT: ld.d $s3, $sp, 16 # 8-byte Folded Reload -; LA64F-NEXT: ld.d $s2, $sp, 24 # 8-byte Folded Reload -; LA64F-NEXT: ld.d $s1, $sp, 32 # 8-byte Folded Reload -; LA64F-NEXT: ld.d $s0, $sp, 40 # 8-byte Folded Reload -; LA64F-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload -; LA64F-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload -; LA64F-NEXT: addi.d $sp, $sp, 64 +; LA64F-NEXT: ld.d $s4, $sp, 24 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload +; LA64F-NEXT: addi.d $sp, $sp, 80 ; LA64F-NEXT: ret ; ; LA64D-LABEL: double_fmin_acquire: @@ -537,7 +546,8 @@ define double @double_fmin_acquire(ptr %p) nounwind { ; LA64D-NEXT: move $fp, $a0 ; LA64D-NEXT: fld.d $fa0, $a0, 0 ; LA64D-NEXT: addi.d $a0, $zero, 1 -; LA64D-NEXT: movgr2fr.d $fs0, $a0 +; LA64D-NEXT: movgr2fr.d $fa1, $a0 +; LA64D-NEXT: ffint.d.l $fs0, $fa1 ; LA64D-NEXT: ori $s0, $zero, 8 ; LA64D-NEXT: addi.d $s1, $sp, 16 ; LA64D-NEXT: addi.d $s2, $sp, 8 @@ -546,8 +556,7 @@ define double @double_fmin_acquire(ptr %p) nounwind { ; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 ; LA64D-NEXT: fst.d $fa0, $sp, 16 ; LA64D-NEXT: fmax.d $fa0, $fa0, $fa0 -; LA64D-NEXT: ffint.d.l $fa1, $fs0 -; LA64D-NEXT: fmin.d $fa0, $fa0, $fa1 +; LA64D-NEXT: fmin.d $fa0, $fa0, $fs0 ; LA64D-NEXT: fst.d $fa0, $sp, 8 ; LA64D-NEXT: move $a0, $s0 ; LA64D-NEXT: move $a1, $fp @@ -575,43 +584,46 @@ define double @double_fmin_acquire(ptr %p) nounwind { define double @double_fmax_acquire(ptr %p) nounwind { ; LA64F-LABEL: double_fmax_acquire: ; LA64F: # %bb.0: -; LA64F-NEXT: addi.d $sp, $sp, -64 -; LA64F-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill -; LA64F-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill -; LA64F-NEXT: st.d $s0, $sp, 40 # 8-byte Folded Spill -; LA64F-NEXT: st.d $s1, $sp, 32 # 8-byte Folded Spill -; LA64F-NEXT: st.d $s2, $sp, 24 # 8-byte Folded Spill -; LA64F-NEXT: st.d $s3, $sp, 16 # 8-byte Folded Spill +; LA64F-NEXT: addi.d $sp, $sp, -80 +; LA64F-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill +; LA64F-NEXT: st.d $fp, $sp, 64 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s2, $sp, 40 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s3, $sp, 32 # 8-byte Folded Spill +; LA64F-NEXT: st.d $s4, $sp, 24 # 8-byte Folded Spill ; LA64F-NEXT: move $fp, $a0 ; LA64F-NEXT: ld.d $a0, $a0, 0 -; LA64F-NEXT: ori $s0, $zero, 8 -; LA64F-NEXT: addi.d $s1, $sp, 8 -; LA64F-NEXT: addi.d $s2, $sp, 0 -; LA64F-NEXT: ori $s3, $zero, 2 +; LA64F-NEXT: lu52i.d $s0, $zero, 1023 +; LA64F-NEXT: ori $s1, $zero, 8 +; LA64F-NEXT: addi.d $s2, $sp, 16 +; LA64F-NEXT: addi.d $s3, $sp, 8 +; LA64F-NEXT: ori $s4, $zero, 2 ; LA64F-NEXT: .LBB7_1: # %atomicrmw.start ; LA64F-NEXT: # =>This Inner Loop Header: Depth=1 -; LA64F-NEXT: st.d $a0, $sp, 8 -; LA64F-NEXT: lu52i.d $a1, $zero, 1023 +; LA64F-NEXT: st.d $a0, $sp, 16 +; LA64F-NEXT: move $a1, $s0 ; LA64F-NEXT: bl %plt(fmax) -; LA64F-NEXT: st.d $a0, $sp, 0 -; LA64F-NEXT: move $a0, $s0 +; LA64F-NEXT: st.d $a0, $sp, 8 +; LA64F-NEXT: move $a0, $s1 ; LA64F-NEXT: move $a1, $fp -; LA64F-NEXT: move $a2, $s1 -; LA64F-NEXT: move $a3, $s2 -; LA64F-NEXT: move $a4, $s3 -; LA64F-NEXT: move $a5, $s3 +; LA64F-NEXT: move $a2, $s2 +; LA64F-NEXT: move $a3, $s3 +; LA64F-NEXT: move $a4, $s4 +; LA64F-NEXT: move $a5, $s4 ; LA64F-NEXT: bl %plt(__atomic_compare_exchange) ; LA64F-NEXT: move $a1, $a0 -; LA64F-NEXT: ld.d $a0, $sp, 8 +; LA64F-NEXT: ld.d $a0, $sp, 16 ; LA64F-NEXT: beqz $a1, .LBB7_1 ; LA64F-NEXT: # %bb.2: # %atomicrmw.end -; LA64F-NEXT: ld.d $s3, $sp, 16 # 8-byte Folded Reload -; LA64F-NEXT: ld.d $s2, $sp, 24 # 8-byte Folded Reload -; LA64F-NEXT: ld.d $s1, $sp, 32 # 8-byte Folded Reload -; LA64F-NEXT: ld.d $s0, $sp, 40 # 8-byte Folded Reload -; LA64F-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload -; LA64F-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload -; LA64F-NEXT: addi.d $sp, $sp, 64 +; LA64F-NEXT: ld.d $s4, $sp, 24 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s3, $sp, 32 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s2, $sp, 40 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload +; LA64F-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload +; LA64F-NEXT: addi.d $sp, $sp, 80 ; LA64F-NEXT: ret ; ; LA64D-LABEL: double_fmax_acquire: @@ -627,7 +639,8 @@ define double @double_fmax_acquire(ptr %p) nounwind { ; LA64D-NEXT: move $fp, $a0 ; LA64D-NEXT: fld.d $fa0, $a0, 0 ; LA64D-NEXT: addi.d $a0, $zero, 1 -; LA64D-NEXT: movgr2fr.d $fs0, $a0 +; LA64D-NEXT: movgr2fr.d $fa1, $a0 +; LA64D-NEXT: ffint.d.l $fs0, $fa1 ; LA64D-NEXT: ori $s0, $zero, 8 ; LA64D-NEXT: addi.d $s1, $sp, 16 ; LA64D-NEXT: addi.d $s2, $sp, 8 @@ -636,8 +649,7 @@ define double @double_fmax_acquire(ptr %p) nounwind { ; LA64D-NEXT: # =>This Inner Loop Header: Depth=1 ; LA64D-NEXT: fst.d $fa0, $sp, 16 ; LA64D-NEXT: fmax.d $fa0, $fa0, $fa0 -; LA64D-NEXT: ffint.d.l $fa1, $fs0 -; LA64D-NEXT: fmax.d $fa0, $fa0, $fa1 +; LA64D-NEXT: fmax.d $fa0, $fa0, $fs0 ; LA64D-NEXT: fst.d $fa0, $sp, 8 ; LA64D-NEXT: move $a0, $s0 ; LA64D-NEXT: move $a1, $fp diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/fcmp-dbl.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/fcmp-dbl.ll index 8058f7b0810ce38ea53ae70bf50bbdba76f43edc..3db98d20fbf11d52572b62fae02f5cc030c17801 100644 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/fcmp-dbl.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/fcmp-dbl.ll @@ -268,8 +268,6 @@ define i1 @fcmp_fast_olt(double %a, double %b, i1 %c) nounwind { ; LA32-NEXT: # %bb.1: # %if.then ; LA32-NEXT: ret ; LA32-NEXT: .LBB16_2: # %if.else -; LA32-NEXT: movgr2fr.w $fa1, $zero -; LA32-NEXT: movgr2frh.w $fa1, $zero ; LA32-NEXT: fcmp.clt.d $fcc0, $fa0, $fa1 ; LA32-NEXT: movcf2gr $a0, $fcc0 ; LA32-NEXT: ret @@ -308,9 +306,6 @@ define i1 @fcmp_fast_oeq(double %a, double %b, i1 %c) nounwind { ; LA32-NEXT: # %bb.1: # %if.then ; LA32-NEXT: ret ; LA32-NEXT: .LBB17_2: # %if.else -; LA32-NEXT: movgr2fr.w $fa1, $zero -; LA32-NEXT: movgr2frh.w $fa1, $zero -; LA32-NEXT: fcmp.ceq.d $fcc0, $fa0, $fa1 ; LA32-NEXT: movcf2gr $a0, $fcc0 ; LA32-NEXT: ret ; @@ -346,8 +341,6 @@ define i1 @fcmp_fast_ole(double %a, double %b, i1 %c) nounwind { ; LA32-NEXT: # %bb.1: # %if.then ; LA32-NEXT: ret ; LA32-NEXT: .LBB18_2: # %if.else -; LA32-NEXT: movgr2fr.w $fa1, $zero -; LA32-NEXT: movgr2frh.w $fa1, $zero ; LA32-NEXT: fcmp.cle.d $fcc0, $fa0, $fa1 ; LA32-NEXT: movcf2gr $a0, $fcc0 ; LA32-NEXT: ret diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll index e91d0c145eab6e176081cd536cb65bb01720ba60..3a7ef00e6f9f97c15f57c2e191cda781c75e773a 100644 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll @@ -72,6 +72,22 @@ define i64 @load_acquire_i64(ptr %ptr) { ret i64 %val } +define ptr @load_acquire_ptr(ptr %ptr) { +; LA32-LABEL: load_acquire_ptr: +; LA32: # %bb.0: +; LA32-NEXT: ld.w $a0, $a0, 0 +; LA32-NEXT: dbar 0 +; LA32-NEXT: ret +; +; LA64-LABEL: load_acquire_ptr: +; LA64: # %bb.0: +; LA64-NEXT: ld.d $a0, $a0, 0 +; LA64-NEXT: dbar 0 +; LA64-NEXT: ret + %val = load atomic ptr, ptr %ptr acquire, align 8 + ret ptr %val +} + define void @store_release_i8(ptr %ptr, i8 signext %v) { ; LA32-LABEL: store_release_i8: ; LA32: # %bb.0: @@ -140,6 +156,21 @@ define void @store_release_i64(ptr %ptr, i64 %v) { ret void } +define void @store_release_ptr(ptr %ptr, ptr %v) { +; LA32-LABEL: store_release_ptr: +; LA32: # %bb.0: +; LA32-NEXT: dbar 0 +; LA32-NEXT: st.w $a1, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: store_release_ptr: +; LA64: # %bb.0: +; LA64-NEXT: amswap_db.d $zero, $a1, $a0 +; LA64-NEXT: ret + store atomic ptr %v, ptr %ptr release, align 8 + ret void +} + define void @store_unordered_i8(ptr %ptr, i8 signext %v) { ; LA32-LABEL: store_unordered_i8: ; LA32: # %bb.0: @@ -203,6 +234,20 @@ define void @store_unordered_i64(ptr %ptr, i64 %v) { ret void } +define void @store_unordered_ptr(ptr %ptr, ptr %v) { +; LA32-LABEL: store_unordered_ptr: +; LA32: # %bb.0: +; LA32-NEXT: st.w $a1, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: store_unordered_ptr: +; LA64: # %bb.0: +; LA64-NEXT: st.d $a1, $a0, 0 +; LA64-NEXT: ret + store atomic ptr %v, ptr %ptr unordered, align 8 + ret void +} + define void @store_monotonic_i8(ptr %ptr, i8 signext %v) { ; LA32-LABEL: store_monotonic_i8: ; LA32: # %bb.0: @@ -266,6 +311,20 @@ define void @store_monotonic_i64(ptr %ptr, i64 %v) { ret void } +define void @store_monotonic_ptr(ptr %ptr, ptr %v) { +; LA32-LABEL: store_monotonic_ptr: +; LA32: # %bb.0: +; LA32-NEXT: st.w $a1, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: store_monotonic_ptr: +; LA64: # %bb.0: +; LA64-NEXT: st.d $a1, $a0, 0 +; LA64-NEXT: ret + store atomic ptr %v, ptr %ptr monotonic, align 8 + ret void +} + define void @store_seq_cst_i8(ptr %ptr, i8 signext %v) { ; LA32-LABEL: store_seq_cst_i8: ; LA32: # %bb.0: @@ -338,3 +397,19 @@ define void @store_seq_cst_i64(ptr %ptr, i64 %v) { store atomic i64 %v, ptr %ptr seq_cst, align 8 ret void } + +define void @store_seq_cst_ptr(ptr %ptr, ptr %v) { +; LA32-LABEL: store_seq_cst_ptr: +; LA32: # %bb.0: +; LA32-NEXT: dbar 0 +; LA32-NEXT: st.w $a1, $a0, 0 +; LA32-NEXT: dbar 0 +; LA32-NEXT: ret +; +; LA64-LABEL: store_seq_cst_ptr: +; LA64: # %bb.0: +; LA64-NEXT: amswap_db.d $zero, $a1, $a0 +; LA64-NEXT: ret + store atomic ptr %v, ptr %ptr seq_cst, align 8 + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll b/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll new file mode 100644 index 0000000000000000000000000000000000000000..ae6f31aaec643470e2a7da19447239f75f772264 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll @@ -0,0 +1,551 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +define void @buildvector_v32i8_splat(ptr %dst, i8 %a0) nounwind { +; CHECK-LABEL: buildvector_v32i8_splat: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvreplgr2vr.b $xr0, $a1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %insert = insertelement <32 x i8> undef, i8 %a0, i8 0 + %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer + store <32 x i8> %splat, ptr %dst + ret void +} + +define void @buildvector_v16i16_splat(ptr %dst, i16 %a0) nounwind { +; CHECK-LABEL: buildvector_v16i16_splat: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvreplgr2vr.h $xr0, $a1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %insert = insertelement <16 x i16> undef, i16 %a0, i8 0 + %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer + store <16 x i16> %splat, ptr %dst + ret void +} + +define void @buildvector_v8i32_splat(ptr %dst, i32 %a0) nounwind { +; CHECK-LABEL: buildvector_v8i32_splat: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvreplgr2vr.w $xr0, $a1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %insert = insertelement <8 x i32> undef, i32 %a0, i8 0 + %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer + store <8 x i32> %splat, ptr %dst + ret void +} + +define void @buildvector_v4i64_splat(ptr %dst, i64 %a0) nounwind { +; CHECK-LABEL: buildvector_v4i64_splat: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvreplgr2vr.d $xr0, $a1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %insert = insertelement <4 x i64> undef, i64 %a0, i8 0 + %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer + store <4 x i64> %splat, ptr %dst + ret void +} + +define void @buildvector_v8f32_splat(ptr %dst, float %a0) nounwind { +; CHECK-LABEL: buildvector_v8f32_splat: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $f0 killed $f0 def $xr0 +; CHECK-NEXT: xvreplve0.w $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %insert = insertelement <8 x float> undef, float %a0, i8 0 + %splat = shufflevector <8 x float> %insert, <8 x float> undef, <8 x i32> zeroinitializer + store <8 x float> %splat, ptr %dst + ret void +} + +define void @buildvector_v4f64_splat(ptr %dst, double %a0) nounwind { +; CHECK-LABEL: buildvector_v4f64_splat: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0 +; CHECK-NEXT: xvreplve0.d $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %insert = insertelement <4 x double> undef, double %a0, i8 0 + %splat = shufflevector <4 x double> %insert, <4 x double> undef, <4 x i32> zeroinitializer + store <4 x double> %splat, ptr %dst + ret void +} + +define void @buildvector_v32i8_const_splat(ptr %dst) nounwind { +; CHECK-LABEL: buildvector_v32i8_const_splat: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvrepli.b $xr0, 1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + store <32 x i8> , ptr %dst + ret void +} + +define void @buildvector_v16i16_const_splat(ptr %dst) nounwind { +; CHECK-LABEL: buildvector_v16i16_const_splat: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvrepli.h $xr0, 1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + store <16 x i16> , ptr %dst + ret void +} + +define void @buildvector_v8i32_const_splat(ptr %dst) nounwind { +; CHECK-LABEL: buildvector_v8i32_const_splat: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvrepli.w $xr0, 1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + store <8 x i32> , ptr %dst + ret void +} + +define void @buildvector_v4i64_const_splat(ptr %dst) nounwind { +; CHECK-LABEL: buildvector_v4i64_const_splat: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvrepli.d $xr0, 1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + store <4 x i64> , ptr %dst + ret void +} + +define void @buildvector_v2f32_const_splat(ptr %dst) nounwind { +; CHECK-LABEL: buildvector_v2f32_const_splat: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lu12i.w $a1, 260096 +; CHECK-NEXT: xvreplgr2vr.w $xr0, $a1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + store <8 x float> , ptr %dst + ret void +} + +define void @buildvector_v4f64_const_splat(ptr %dst) nounwind { +; CHECK-LABEL: buildvector_v4f64_const_splat: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lu52i.d $a1, $zero, 1023 +; CHECK-NEXT: xvreplgr2vr.d $xr0, $a1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + store <4 x double> , ptr %dst + ret void +} + +define void @buildvector_v32i8_const(ptr %dst) nounwind { +; CHECK-LABEL: buildvector_v32i8_const: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI12_0) +; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI12_0) +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + store <32 x i8> , ptr %dst + ret void +} + +define void @buildvector_v16i16_const(ptr %dst) nounwind { +; CHECK-LABEL: buildvector_v16i16_const: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI13_0) +; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI13_0) +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + store <16 x i16> , ptr %dst + ret void +} + +define void @buildvector_v8i32_const(ptr %dst) nounwind { +; CHECK-LABEL: buildvector_v8i32_const: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI14_0) +; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI14_0) +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + store <8 x i32> , ptr %dst + ret void +} + +define void @buildvector_v4i64_const(ptr %dst) nounwind { +; CHECK-LABEL: buildvector_v4i64_const: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI15_0) +; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI15_0) +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + store <4 x i64> , ptr %dst + ret void +} + +define void @buildvector_v2f32_const(ptr %dst) nounwind { +; CHECK-LABEL: buildvector_v2f32_const: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI16_0) +; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI16_0) +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + store <8 x float> , ptr %dst + ret void +} + +define void @buildvector_v4f64_const(ptr %dst) nounwind { +; CHECK-LABEL: buildvector_v4f64_const: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI17_0) +; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI17_0) +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + store <4 x double> , ptr %dst + ret void +} + +define void @buildvector_v32i8(ptr %dst, i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7, i8 %a8, i8 %a9, i8 %a10, i8 %a11, i8 %a12, i8 %a13, i8 %a14, i8 %a15, i8 %a16, i8 %a17, i8 %a18, i8 %a19, i8 %a20, i8 %a21, i8 %a22, i8 %a23, i8 %a24, i8 %a25, i8 %a26, i8 %a27, i8 %a28, i8 %a29, i8 %a30, i8 %a31) nounwind { +; CHECK-LABEL: buildvector_v32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 0 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a2, 1 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a3, 2 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a4, 3 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a5, 4 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a6, 5 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a7, 6 +; CHECK-NEXT: ld.b $a1, $sp, 0 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 7 +; CHECK-NEXT: ld.b $a1, $sp, 8 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 8 +; CHECK-NEXT: ld.b $a1, $sp, 16 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 9 +; CHECK-NEXT: ld.b $a1, $sp, 24 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 10 +; CHECK-NEXT: ld.b $a1, $sp, 32 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 11 +; CHECK-NEXT: ld.b $a1, $sp, 40 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 12 +; CHECK-NEXT: ld.b $a1, $sp, 48 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 13 +; CHECK-NEXT: ld.b $a1, $sp, 56 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 14 +; CHECK-NEXT: ld.b $a1, $sp, 64 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 15 +; CHECK-NEXT: ld.b $a1, $sp, 72 +; CHECK-NEXT: xvori.b $xr1, $xr0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 0 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ld.b $a1, $sp, 80 +; CHECK-NEXT: xvori.b $xr1, $xr0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 1 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ld.b $a1, $sp, 88 +; CHECK-NEXT: xvori.b $xr1, $xr0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 2 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ld.b $a1, $sp, 96 +; CHECK-NEXT: xvori.b $xr1, $xr0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 3 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ld.b $a1, $sp, 104 +; CHECK-NEXT: xvori.b $xr1, $xr0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 4 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ld.b $a1, $sp, 112 +; CHECK-NEXT: xvori.b $xr1, $xr0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 5 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ld.b $a1, $sp, 120 +; CHECK-NEXT: xvori.b $xr1, $xr0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 6 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ld.b $a1, $sp, 128 +; CHECK-NEXT: xvori.b $xr1, $xr0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 7 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ld.b $a1, $sp, 136 +; CHECK-NEXT: xvori.b $xr1, $xr0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 8 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ld.b $a1, $sp, 144 +; CHECK-NEXT: xvori.b $xr1, $xr0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 9 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ld.b $a1, $sp, 152 +; CHECK-NEXT: xvori.b $xr1, $xr0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 10 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ld.b $a1, $sp, 160 +; CHECK-NEXT: xvori.b $xr1, $xr0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 11 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ld.b $a1, $sp, 168 +; CHECK-NEXT: xvori.b $xr1, $xr0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 12 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ld.b $a1, $sp, 176 +; CHECK-NEXT: xvori.b $xr1, $xr0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 13 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ld.b $a1, $sp, 184 +; CHECK-NEXT: xvori.b $xr1, $xr0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 14 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ld.b $a1, $sp, 192 +; CHECK-NEXT: xvori.b $xr1, $xr0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 15 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %ins0 = insertelement <32 x i8> undef, i8 %a0, i32 0 + %ins1 = insertelement <32 x i8> %ins0, i8 %a1, i32 1 + %ins2 = insertelement <32 x i8> %ins1, i8 %a2, i32 2 + %ins3 = insertelement <32 x i8> %ins2, i8 %a3, i32 3 + %ins4 = insertelement <32 x i8> %ins3, i8 %a4, i32 4 + %ins5 = insertelement <32 x i8> %ins4, i8 %a5, i32 5 + %ins6 = insertelement <32 x i8> %ins5, i8 %a6, i32 6 + %ins7 = insertelement <32 x i8> %ins6, i8 %a7, i32 7 + %ins8 = insertelement <32 x i8> %ins7, i8 %a8, i32 8 + %ins9 = insertelement <32 x i8> %ins8, i8 %a9, i32 9 + %ins10 = insertelement <32 x i8> %ins9, i8 %a10, i32 10 + %ins11 = insertelement <32 x i8> %ins10, i8 %a11, i32 11 + %ins12 = insertelement <32 x i8> %ins11, i8 %a12, i32 12 + %ins13 = insertelement <32 x i8> %ins12, i8 %a13, i32 13 + %ins14 = insertelement <32 x i8> %ins13, i8 %a14, i32 14 + %ins15 = insertelement <32 x i8> %ins14, i8 %a15, i32 15 + %ins16 = insertelement <32 x i8> %ins15, i8 %a16, i32 16 + %ins17 = insertelement <32 x i8> %ins16, i8 %a17, i32 17 + %ins18 = insertelement <32 x i8> %ins17, i8 %a18, i32 18 + %ins19 = insertelement <32 x i8> %ins18, i8 %a19, i32 19 + %ins20 = insertelement <32 x i8> %ins19, i8 %a20, i32 20 + %ins21 = insertelement <32 x i8> %ins20, i8 %a21, i32 21 + %ins22 = insertelement <32 x i8> %ins21, i8 %a22, i32 22 + %ins23 = insertelement <32 x i8> %ins22, i8 %a23, i32 23 + %ins24 = insertelement <32 x i8> %ins23, i8 %a24, i32 24 + %ins25 = insertelement <32 x i8> %ins24, i8 %a25, i32 25 + %ins26 = insertelement <32 x i8> %ins25, i8 %a26, i32 26 + %ins27 = insertelement <32 x i8> %ins26, i8 %a27, i32 27 + %ins28 = insertelement <32 x i8> %ins27, i8 %a28, i32 28 + %ins29 = insertelement <32 x i8> %ins28, i8 %a29, i32 29 + %ins30 = insertelement <32 x i8> %ins29, i8 %a30, i32 30 + %ins31 = insertelement <32 x i8> %ins30, i8 %a31, i32 31 + store <32 x i8> %ins31, ptr %dst + ret void +} + +define void @buildvector_v16i16(ptr %dst, i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7, i16 %a8, i16 %a9, i16 %a10, i16 %a11, i16 %a12, i16 %a13, i16 %a14, i16 %a15) nounwind { +; CHECK-LABEL: buildvector_v16i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 0 +; CHECK-NEXT: vinsgr2vr.h $vr0, $a2, 1 +; CHECK-NEXT: vinsgr2vr.h $vr0, $a3, 2 +; CHECK-NEXT: vinsgr2vr.h $vr0, $a4, 3 +; CHECK-NEXT: vinsgr2vr.h $vr0, $a5, 4 +; CHECK-NEXT: vinsgr2vr.h $vr0, $a6, 5 +; CHECK-NEXT: vinsgr2vr.h $vr0, $a7, 6 +; CHECK-NEXT: ld.h $a1, $sp, 0 +; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 7 +; CHECK-NEXT: ld.h $a1, $sp, 8 +; CHECK-NEXT: xvori.b $xr1, $xr0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 0 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ld.h $a1, $sp, 16 +; CHECK-NEXT: xvori.b $xr1, $xr0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 1 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ld.h $a1, $sp, 24 +; CHECK-NEXT: xvori.b $xr1, $xr0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 2 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ld.h $a1, $sp, 32 +; CHECK-NEXT: xvori.b $xr1, $xr0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 3 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ld.h $a1, $sp, 40 +; CHECK-NEXT: xvori.b $xr1, $xr0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 4 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ld.h $a1, $sp, 48 +; CHECK-NEXT: xvori.b $xr1, $xr0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 5 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ld.h $a1, $sp, 56 +; CHECK-NEXT: xvori.b $xr1, $xr0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 6 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: ld.h $a1, $sp, 64 +; CHECK-NEXT: xvori.b $xr1, $xr0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 7 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %ins0 = insertelement <16 x i16> undef, i16 %a0, i32 0 + %ins1 = insertelement <16 x i16> %ins0, i16 %a1, i32 1 + %ins2 = insertelement <16 x i16> %ins1, i16 %a2, i32 2 + %ins3 = insertelement <16 x i16> %ins2, i16 %a3, i32 3 + %ins4 = insertelement <16 x i16> %ins3, i16 %a4, i32 4 + %ins5 = insertelement <16 x i16> %ins4, i16 %a5, i32 5 + %ins6 = insertelement <16 x i16> %ins5, i16 %a6, i32 6 + %ins7 = insertelement <16 x i16> %ins6, i16 %a7, i32 7 + %ins8 = insertelement <16 x i16> %ins7, i16 %a8, i32 8 + %ins9 = insertelement <16 x i16> %ins8, i16 %a9, i32 9 + %ins10 = insertelement <16 x i16> %ins9, i16 %a10, i32 10 + %ins11 = insertelement <16 x i16> %ins10, i16 %a11, i32 11 + %ins12 = insertelement <16 x i16> %ins11, i16 %a12, i32 12 + %ins13 = insertelement <16 x i16> %ins12, i16 %a13, i32 13 + %ins14 = insertelement <16 x i16> %ins13, i16 %a14, i32 14 + %ins15 = insertelement <16 x i16> %ins14, i16 %a15, i32 15 + store <16 x i16> %ins15, ptr %dst + ret void +} + +define void @buildvector_v8i32(ptr %dst, i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7) nounwind { +; CHECK-LABEL: buildvector_v8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 0 +; CHECK-NEXT: xvinsgr2vr.w $xr0, $a2, 1 +; CHECK-NEXT: xvinsgr2vr.w $xr0, $a3, 2 +; CHECK-NEXT: xvinsgr2vr.w $xr0, $a4, 3 +; CHECK-NEXT: xvinsgr2vr.w $xr0, $a5, 4 +; CHECK-NEXT: xvinsgr2vr.w $xr0, $a6, 5 +; CHECK-NEXT: xvinsgr2vr.w $xr0, $a7, 6 +; CHECK-NEXT: ld.w $a1, $sp, 0 +; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 7 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %ins0 = insertelement <8 x i32> undef, i32 %a0, i32 0 + %ins1 = insertelement <8 x i32> %ins0, i32 %a1, i32 1 + %ins2 = insertelement <8 x i32> %ins1, i32 %a2, i32 2 + %ins3 = insertelement <8 x i32> %ins2, i32 %a3, i32 3 + %ins4 = insertelement <8 x i32> %ins3, i32 %a4, i32 4 + %ins5 = insertelement <8 x i32> %ins4, i32 %a5, i32 5 + %ins6 = insertelement <8 x i32> %ins5, i32 %a6, i32 6 + %ins7 = insertelement <8 x i32> %ins6, i32 %a7, i32 7 + store <8 x i32> %ins7, ptr %dst + ret void +} + +define void @buildvector_v4i64(ptr %dst, i64 %a0, i64 %a1, i64 %a2, i64 %a3) nounwind { +; CHECK-LABEL: buildvector_v4i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvinsgr2vr.d $xr0, $a1, 0 +; CHECK-NEXT: xvinsgr2vr.d $xr0, $a2, 1 +; CHECK-NEXT: xvinsgr2vr.d $xr0, $a3, 2 +; CHECK-NEXT: xvinsgr2vr.d $xr0, $a4, 3 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %ins0 = insertelement <4 x i64> undef, i64 %a0, i32 0 + %ins1 = insertelement <4 x i64> %ins0, i64 %a1, i32 1 + %ins2 = insertelement <4 x i64> %ins1, i64 %a2, i32 2 + %ins3 = insertelement <4 x i64> %ins2, i64 %a3, i32 3 + store <4 x i64> %ins3, ptr %dst + ret void +} + +define void @buildvector_v8f32(ptr %dst, float %a0, float %a1, float %a2, float %a3, float %a4, float %a5, float %a6, float %a7) nounwind { +; CHECK-LABEL: buildvector_v8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movfr2gr.s $a1, $fa0 +; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 0 +; CHECK-NEXT: movfr2gr.s $a1, $fa1 +; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 1 +; CHECK-NEXT: movfr2gr.s $a1, $fa2 +; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 2 +; CHECK-NEXT: movfr2gr.s $a1, $fa3 +; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 3 +; CHECK-NEXT: movfr2gr.s $a1, $fa4 +; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 4 +; CHECK-NEXT: movfr2gr.s $a1, $fa5 +; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 5 +; CHECK-NEXT: movfr2gr.s $a1, $fa6 +; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 6 +; CHECK-NEXT: movfr2gr.s $a1, $fa7 +; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 7 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %ins0 = insertelement <8 x float> undef, float %a0, i32 0 + %ins1 = insertelement <8 x float> %ins0, float %a1, i32 1 + %ins2 = insertelement <8 x float> %ins1, float %a2, i32 2 + %ins3 = insertelement <8 x float> %ins2, float %a3, i32 3 + %ins4 = insertelement <8 x float> %ins3, float %a4, i32 4 + %ins5 = insertelement <8 x float> %ins4, float %a5, i32 5 + %ins6 = insertelement <8 x float> %ins5, float %a6, i32 6 + %ins7 = insertelement <8 x float> %ins6, float %a7, i32 7 + store <8 x float> %ins7, ptr %dst + ret void +} + +define void @buildvector_v4f64(ptr %dst, double %a0, double %a1, double %a2, double %a3) nounwind { +; CHECK-LABEL: buildvector_v4f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movfr2gr.d $a1, $fa0 +; CHECK-NEXT: xvinsgr2vr.d $xr0, $a1, 0 +; CHECK-NEXT: movfr2gr.d $a1, $fa1 +; CHECK-NEXT: xvinsgr2vr.d $xr0, $a1, 1 +; CHECK-NEXT: movfr2gr.d $a1, $fa2 +; CHECK-NEXT: xvinsgr2vr.d $xr0, $a1, 2 +; CHECK-NEXT: movfr2gr.d $a1, $fa3 +; CHECK-NEXT: xvinsgr2vr.d $xr0, $a1, 3 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %ins0 = insertelement <4 x double> undef, double %a0, i32 0 + %ins1 = insertelement <4 x double> %ins0, double %a1, i32 1 + %ins2 = insertelement <4 x double> %ins1, double %a2, i32 2 + %ins3 = insertelement <4 x double> %ins2, double %a3, i32 3 + store <4 x double> %ins3, ptr %dst + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/ctpop-ctlz.ll b/llvm/test/CodeGen/LoongArch/lasx/ctpop-ctlz.ll new file mode 100644 index 0000000000000000000000000000000000000000..7786e399c95f40e5a0ccb99b9b922d6304d71724 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/ctpop-ctlz.ll @@ -0,0 +1,115 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +define void @ctpop_v32i8(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: ctpop_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvpcnt.b $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a1, 0 +; CHECK-NEXT: ret + %v = load <32 x i8>, ptr %src + %res = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %v) + store <32 x i8> %res, ptr %dst + ret void +} + +define void @ctpop_v16i16(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: ctpop_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvpcnt.h $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a1, 0 +; CHECK-NEXT: ret + %v = load <16 x i16>, ptr %src + %res = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %v) + store <16 x i16> %res, ptr %dst + ret void +} + +define void @ctpop_v8i32(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: ctpop_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvpcnt.w $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a1, 0 +; CHECK-NEXT: ret + %v = load <8 x i32>, ptr %src + %res = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %v) + store <8 x i32> %res, ptr %dst + ret void +} + +define void @ctpop_v4i64(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: ctpop_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvpcnt.d $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a1, 0 +; CHECK-NEXT: ret + %v = load <4 x i64>, ptr %src + %res = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %v) + store <4 x i64> %res, ptr %dst + ret void +} + +define void @ctlz_v32i8(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: ctlz_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvclz.b $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a1, 0 +; CHECK-NEXT: ret + %v = load <32 x i8>, ptr %src + %res = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %v, i1 false) + store <32 x i8> %res, ptr %dst + ret void +} + +define void @ctlz_v16i16(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: ctlz_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvclz.h $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a1, 0 +; CHECK-NEXT: ret + %v = load <16 x i16>, ptr %src + %res = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %v, i1 false) + store <16 x i16> %res, ptr %dst + ret void +} + +define void @ctlz_v8i32(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: ctlz_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvclz.w $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a1, 0 +; CHECK-NEXT: ret + %v = load <8 x i32>, ptr %src + %res = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %v, i1 false) + store <8 x i32> %res, ptr %dst + ret void +} + +define void @ctlz_v4i64(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: ctlz_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvclz.d $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a1, 0 +; CHECK-NEXT: ret + %v = load <4 x i64>, ptr %src + %res = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %v, i1 false) + store <4 x i64> %res, ptr %dst + ret void +} + +declare <32 x i8> @llvm.ctpop.v32i8(<32 x i8>) +declare <16 x i16> @llvm.ctpop.v16i16(<16 x i16>) +declare <8 x i32> @llvm.ctpop.v8i32(<8 x i32>) +declare <4 x i64> @llvm.ctpop.v4i64(<4 x i64>) +declare <32 x i8> @llvm.ctlz.v32i8(<32 x i8>, i1) +declare <16 x i16> @llvm.ctlz.v16i16(<16 x i16>, i1) +declare <8 x i32> @llvm.ctlz.v8i32(<8 x i32>, i1) +declare <4 x i64> @llvm.ctlz.v4i64(<4 x i64>, i1) diff --git a/llvm/test/CodeGen/LoongArch/lasx/fma-v4f64.ll b/llvm/test/CodeGen/LoongArch/lasx/fma-v4f64.ll new file mode 100644 index 0000000000000000000000000000000000000000..af18c52b096c801a846c741219942d5bdf2db3b8 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/fma-v4f64.ll @@ -0,0 +1,804 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx --fp-contract=fast < %s \ +; RUN: | FileCheck %s --check-prefix=CONTRACT-FAST +; RUN: llc --mtriple=loongarch64 --mattr=+lasx --fp-contract=on < %s \ +; RUN: | FileCheck %s --check-prefix=CONTRACT-ON +; RUN: llc --mtriple=loongarch64 --mattr=+lasx --fp-contract=off < %s \ +; RUN: | FileCheck %s --check-prefix=CONTRACT-OFF + +define void @xvfmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: xvfmadd_d: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-FAST-NEXT: xvfmadd.d $xr0, $xr2, $xr1, $xr0 +; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: xvfmadd_d: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 +; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 +; CONTRACT-ON-NEXT: xvfmul.d $xr0, $xr1, $xr0 +; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 +; CONTRACT-ON-NEXT: xvfadd.d $xr0, $xr0, $xr1 +; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: xvfmadd_d: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 +; CONTRACT-OFF-NEXT: xvfmul.d $xr0, $xr1, $xr0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 +; CONTRACT-OFF-NEXT: xvfadd.d $xr0, $xr0, $xr1 +; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %v2 = load <4 x double>, ptr %a2 + %mul = fmul<4 x double> %v0, %v1 + %add = fadd<4 x double> %mul, %v2 + store <4 x double> %add, ptr %res + ret void +} + +define void @xvfmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: xvfmsub_d: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-FAST-NEXT: xvfmsub.d $xr0, $xr2, $xr1, $xr0 +; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: xvfmsub_d: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 +; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 +; CONTRACT-ON-NEXT: xvfmul.d $xr0, $xr1, $xr0 +; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 +; CONTRACT-ON-NEXT: xvfsub.d $xr0, $xr0, $xr1 +; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: xvfmsub_d: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 +; CONTRACT-OFF-NEXT: xvfmul.d $xr0, $xr1, $xr0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 +; CONTRACT-OFF-NEXT: xvfsub.d $xr0, $xr0, $xr1 +; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %v2 = load <4 x double>, ptr %a2 + %mul = fmul<4 x double> %v0, %v1 + %sub = fsub<4 x double> %mul, %v2 + store <4 x double> %sub, ptr %res + ret void +} + +define void @xvfnmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: xvfnmadd_d: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-FAST-NEXT: xvfnmadd.d $xr0, $xr2, $xr1, $xr0 +; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: xvfnmadd_d: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 +; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 +; CONTRACT-ON-NEXT: xvfmul.d $xr0, $xr1, $xr0 +; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 +; CONTRACT-ON-NEXT: xvfadd.d $xr0, $xr0, $xr1 +; CONTRACT-ON-NEXT: xvbitrevi.d $xr0, $xr0, 63 +; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: xvfnmadd_d: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 +; CONTRACT-OFF-NEXT: xvfmul.d $xr0, $xr1, $xr0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 +; CONTRACT-OFF-NEXT: xvfadd.d $xr0, $xr0, $xr1 +; CONTRACT-OFF-NEXT: xvbitrevi.d $xr0, $xr0, 63 +; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %v2 = load <4 x double>, ptr %a2 + %mul = fmul<4 x double> %v0, %v1 + %add = fadd<4 x double> %mul, %v2 + %negadd = fneg<4 x double> %add + store <4 x double> %negadd, ptr %res + ret void +} + +define void @xvfnmadd_d_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: xvfnmadd_d_nsz: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-FAST-NEXT: xvfnmadd.d $xr0, $xr2, $xr1, $xr0 +; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: xvfnmadd_d_nsz: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 +; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 +; CONTRACT-ON-NEXT: xvbitrevi.d $xr1, $xr1, 63 +; CONTRACT-ON-NEXT: xvfmul.d $xr0, $xr1, $xr0 +; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 +; CONTRACT-ON-NEXT: xvfsub.d $xr0, $xr0, $xr1 +; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: xvfnmadd_d_nsz: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 +; CONTRACT-OFF-NEXT: xvbitrevi.d $xr1, $xr1, 63 +; CONTRACT-OFF-NEXT: xvfmul.d $xr0, $xr1, $xr0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 +; CONTRACT-OFF-NEXT: xvfsub.d $xr0, $xr0, $xr1 +; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %v2 = load <4 x double>, ptr %a2 + %negv0 = fneg nsz<4 x double> %v0 + %negv2 = fneg nsz<4 x double> %v2 + %mul = fmul nsz<4 x double> %negv0, %v1 + %add = fadd nsz<4 x double> %mul, %negv2 + store <4 x double> %add, ptr %res + ret void +} + +;; Check that xvfnmadd.d is not emitted. +define void @not_xvfnmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: not_xvfnmadd_d: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-FAST-NEXT: xvbitrevi.d $xr2, $xr2, 63 +; CONTRACT-FAST-NEXT: xvfmsub.d $xr0, $xr2, $xr1, $xr0 +; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: not_xvfnmadd_d: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 +; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 +; CONTRACT-ON-NEXT: xvbitrevi.d $xr1, $xr1, 63 +; CONTRACT-ON-NEXT: xvfmul.d $xr0, $xr1, $xr0 +; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 +; CONTRACT-ON-NEXT: xvfsub.d $xr0, $xr0, $xr1 +; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: not_xvfnmadd_d: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 +; CONTRACT-OFF-NEXT: xvbitrevi.d $xr1, $xr1, 63 +; CONTRACT-OFF-NEXT: xvfmul.d $xr0, $xr1, $xr0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 +; CONTRACT-OFF-NEXT: xvfsub.d $xr0, $xr0, $xr1 +; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %v2 = load <4 x double>, ptr %a2 + %negv0 = fneg<4 x double> %v0 + %negv2 = fneg<4 x double> %v2 + %mul = fmul<4 x double> %negv0, %v1 + %add = fadd<4 x double> %mul, %negv2 + store <4 x double> %add, ptr %res + ret void +} + +define void @xvfnmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: xvfnmsub_d: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-FAST-NEXT: xvfnmsub.d $xr0, $xr2, $xr1, $xr0 +; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: xvfnmsub_d: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 +; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 +; CONTRACT-ON-NEXT: xvfmul.d $xr0, $xr1, $xr0 +; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 +; CONTRACT-ON-NEXT: xvfsub.d $xr0, $xr0, $xr1 +; CONTRACT-ON-NEXT: xvbitrevi.d $xr0, $xr0, 63 +; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: xvfnmsub_d: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 +; CONTRACT-OFF-NEXT: xvfmul.d $xr0, $xr1, $xr0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 +; CONTRACT-OFF-NEXT: xvfsub.d $xr0, $xr0, $xr1 +; CONTRACT-OFF-NEXT: xvbitrevi.d $xr0, $xr0, 63 +; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %v2 = load <4 x double>, ptr %a2 + %negv2 = fneg<4 x double> %v2 + %mul = fmul<4 x double> %v0, %v1 + %add = fadd<4 x double> %mul, %negv2 + %neg = fneg<4 x double> %add + store <4 x double> %neg, ptr %res + ret void +} + +define void @xvfnmsub_d_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: xvfnmsub_d_nsz: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-FAST-NEXT: xvfnmsub.d $xr0, $xr2, $xr1, $xr0 +; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: xvfnmsub_d_nsz: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 +; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 +; CONTRACT-ON-NEXT: xvfmul.d $xr0, $xr1, $xr0 +; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 +; CONTRACT-ON-NEXT: xvfsub.d $xr0, $xr1, $xr0 +; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: xvfnmsub_d_nsz: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 +; CONTRACT-OFF-NEXT: xvfmul.d $xr0, $xr1, $xr0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 +; CONTRACT-OFF-NEXT: xvfsub.d $xr0, $xr1, $xr0 +; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %v2 = load <4 x double>, ptr %a2 + %negv0 = fneg nsz<4 x double> %v0 + %mul = fmul nsz<4 x double> %negv0, %v1 + %add = fadd nsz<4 x double> %mul, %v2 + store <4 x double> %add, ptr %res + ret void +} + +;; Check that xvfnmsub.d is not emitted. +define void @not_xvfnmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: not_xvfnmsub_d: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-FAST-NEXT: xvbitrevi.d $xr2, $xr2, 63 +; CONTRACT-FAST-NEXT: xvfmadd.d $xr0, $xr2, $xr1, $xr0 +; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: not_xvfnmsub_d: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 +; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 +; CONTRACT-ON-NEXT: xvfmul.d $xr0, $xr1, $xr0 +; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 +; CONTRACT-ON-NEXT: xvfsub.d $xr0, $xr1, $xr0 +; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: not_xvfnmsub_d: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 +; CONTRACT-OFF-NEXT: xvfmul.d $xr0, $xr1, $xr0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 +; CONTRACT-OFF-NEXT: xvfsub.d $xr0, $xr1, $xr0 +; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %v2 = load <4 x double>, ptr %a2 + %negv0 = fneg<4 x double> %v0 + %mul = fmul<4 x double> %negv0, %v1 + %add = fadd<4 x double> %mul, %v2 + store <4 x double> %add, ptr %res + ret void +} + +define void @contract_xvfmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: contract_xvfmadd_d: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-FAST-NEXT: xvfmadd.d $xr0, $xr2, $xr1, $xr0 +; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: contract_xvfmadd_d: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-ON-NEXT: xvfmadd.d $xr0, $xr2, $xr1, $xr0 +; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: contract_xvfmadd_d: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-OFF-NEXT: xvfmadd.d $xr0, $xr2, $xr1, $xr0 +; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %v2 = load <4 x double>, ptr %a2 + %mul = fmul contract <4 x double> %v0, %v1 + %add = fadd contract <4 x double> %mul, %v2 + store <4 x double> %add, ptr %res + ret void +} + +define void @contract_xvfmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: contract_xvfmsub_d: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-FAST-NEXT: xvfmsub.d $xr0, $xr2, $xr1, $xr0 +; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: contract_xvfmsub_d: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-ON-NEXT: xvfmsub.d $xr0, $xr2, $xr1, $xr0 +; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: contract_xvfmsub_d: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-OFF-NEXT: xvfmsub.d $xr0, $xr2, $xr1, $xr0 +; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %v2 = load <4 x double>, ptr %a2 + %mul = fmul contract <4 x double> %v0, %v1 + %sub = fsub contract <4 x double> %mul, %v2 + store <4 x double> %sub, ptr %res + ret void +} + +define void @contract_xvfnmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: contract_xvfnmadd_d: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-FAST-NEXT: xvfnmadd.d $xr0, $xr2, $xr1, $xr0 +; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: contract_xvfnmadd_d: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-ON-NEXT: xvfnmadd.d $xr0, $xr2, $xr1, $xr0 +; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: contract_xvfnmadd_d: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-OFF-NEXT: xvfnmadd.d $xr0, $xr2, $xr1, $xr0 +; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %v2 = load <4 x double>, ptr %a2 + %mul = fmul contract <4 x double> %v0, %v1 + %add = fadd contract <4 x double> %mul, %v2 + %negadd = fneg contract <4 x double> %add + store <4 x double> %negadd, ptr %res + ret void +} + +define void @contract_xvfnmadd_d_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: contract_xvfnmadd_d_nsz: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-FAST-NEXT: xvfnmadd.d $xr0, $xr2, $xr1, $xr0 +; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: contract_xvfnmadd_d_nsz: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-ON-NEXT: xvfnmadd.d $xr0, $xr2, $xr1, $xr0 +; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: contract_xvfnmadd_d_nsz: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-OFF-NEXT: xvfnmadd.d $xr0, $xr2, $xr1, $xr0 +; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %v2 = load <4 x double>, ptr %a2 + %negv0 = fneg contract nsz<4 x double> %v0 + %negv2 = fneg contract nsz<4 x double> %v2 + %mul = fmul contract nsz<4 x double> %negv0, %v1 + %add = fadd contract nsz<4 x double> %mul, %negv2 + store <4 x double> %add, ptr %res + ret void +} + +;; Check that xvfnmadd.d is not emitted. +define void @not_contract_xvfnmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: not_contract_xvfnmadd_d: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-FAST-NEXT: xvbitrevi.d $xr2, $xr2, 63 +; CONTRACT-FAST-NEXT: xvfmsub.d $xr0, $xr2, $xr1, $xr0 +; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: not_contract_xvfnmadd_d: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-ON-NEXT: xvbitrevi.d $xr2, $xr2, 63 +; CONTRACT-ON-NEXT: xvfmsub.d $xr0, $xr2, $xr1, $xr0 +; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: not_contract_xvfnmadd_d: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-OFF-NEXT: xvbitrevi.d $xr2, $xr2, 63 +; CONTRACT-OFF-NEXT: xvfmsub.d $xr0, $xr2, $xr1, $xr0 +; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %v2 = load <4 x double>, ptr %a2 + %negv0 = fneg contract <4 x double> %v0 + %negv2 = fneg contract <4 x double> %v2 + %mul = fmul contract <4 x double> %negv0, %v1 + %add = fadd contract <4 x double> %mul, %negv2 + store <4 x double> %add, ptr %res + ret void +} + +define void @contract_xvfnmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: contract_xvfnmsub_d: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-FAST-NEXT: xvfnmsub.d $xr0, $xr2, $xr1, $xr0 +; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: contract_xvfnmsub_d: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-ON-NEXT: xvfnmsub.d $xr0, $xr2, $xr1, $xr0 +; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: contract_xvfnmsub_d: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-OFF-NEXT: xvfnmsub.d $xr0, $xr2, $xr1, $xr0 +; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %v2 = load <4 x double>, ptr %a2 + %negv2 = fneg contract <4 x double> %v2 + %mul = fmul contract <4 x double> %v0, %v1 + %add = fadd contract <4 x double> %mul, %negv2 + %neg = fneg contract <4 x double> %add + store <4 x double> %neg, ptr %res + ret void +} + +define void @contract_xvfnmsub_d_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: contract_xvfnmsub_d_nsz: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-FAST-NEXT: xvfnmsub.d $xr0, $xr2, $xr1, $xr0 +; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: contract_xvfnmsub_d_nsz: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-ON-NEXT: xvfnmsub.d $xr0, $xr2, $xr1, $xr0 +; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: contract_xvfnmsub_d_nsz: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-OFF-NEXT: xvfnmsub.d $xr0, $xr2, $xr1, $xr0 +; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %v2 = load <4 x double>, ptr %a2 + %negv0 = fneg contract nsz<4 x double> %v0 + %mul = fmul contract nsz<4 x double> %negv0, %v1 + %add = fadd contract nsz<4 x double> %mul, %v2 + store <4 x double> %add, ptr %res + ret void +} + +;; Check that xvfnmsub.d is not emitted. +define void @not_contract_xvfnmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: not_contract_xvfnmsub_d: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-FAST-NEXT: xvbitrevi.d $xr2, $xr2, 63 +; CONTRACT-FAST-NEXT: xvfmadd.d $xr0, $xr2, $xr1, $xr0 +; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: not_contract_xvfnmsub_d: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-ON-NEXT: xvbitrevi.d $xr2, $xr2, 63 +; CONTRACT-ON-NEXT: xvfmadd.d $xr0, $xr2, $xr1, $xr0 +; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: not_contract_xvfnmsub_d: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-OFF-NEXT: xvbitrevi.d $xr2, $xr2, 63 +; CONTRACT-OFF-NEXT: xvfmadd.d $xr0, $xr2, $xr1, $xr0 +; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %v2 = load <4 x double>, ptr %a2 + %negv0 = fneg contract <4 x double> %v0 + %mul = fmul contract <4 x double> %negv0, %v1 + %add = fadd contract <4 x double> %mul, %v2 + store <4 x double> %add, ptr %res + ret void +} + +define void @xvfmadd_d_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: xvfmadd_d_contract: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-FAST-NEXT: xvfmadd.d $xr0, $xr2, $xr1, $xr0 +; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: xvfmadd_d_contract: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-ON-NEXT: xvfmadd.d $xr0, $xr2, $xr1, $xr0 +; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: xvfmadd_d_contract: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-OFF-NEXT: xvfmadd.d $xr0, $xr2, $xr1, $xr0 +; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %v2 = load <4 x double>, ptr %a2 + %mul = fmul contract <4 x double> %v0, %v1 + %add = fadd contract <4 x double> %mul, %v2 + store <4 x double> %add, ptr %res + ret void +} + +define void @xvfmsub_d_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: xvfmsub_d_contract: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-FAST-NEXT: xvfmsub.d $xr0, $xr2, $xr1, $xr0 +; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: xvfmsub_d_contract: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-ON-NEXT: xvfmsub.d $xr0, $xr2, $xr1, $xr0 +; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: xvfmsub_d_contract: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-OFF-NEXT: xvfmsub.d $xr0, $xr2, $xr1, $xr0 +; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %v2 = load <4 x double>, ptr %a2 + %mul = fmul contract <4 x double> %v0, %v1 + %sub = fsub contract <4 x double> %mul, %v2 + store <4 x double> %sub, ptr %res + ret void +} + +define void @xvfnmadd_d_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: xvfnmadd_d_contract: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-FAST-NEXT: xvfnmadd.d $xr0, $xr2, $xr1, $xr0 +; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: xvfnmadd_d_contract: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-ON-NEXT: xvfnmadd.d $xr0, $xr2, $xr1, $xr0 +; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: xvfnmadd_d_contract: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-OFF-NEXT: xvfnmadd.d $xr0, $xr2, $xr1, $xr0 +; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %v2 = load <4 x double>, ptr %a2 + %mul = fmul contract <4 x double> %v0, %v1 + %add = fadd contract <4 x double> %mul, %v2 + %negadd = fneg contract <4 x double> %add + store <4 x double> %negadd, ptr %res + ret void +} + +define void @xvfnmsub_d_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: xvfnmsub_d_contract: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-FAST-NEXT: xvfnmsub.d $xr0, $xr2, $xr1, $xr0 +; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: xvfnmsub_d_contract: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-ON-NEXT: xvfnmsub.d $xr0, $xr2, $xr1, $xr0 +; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: xvfnmsub_d_contract: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-OFF-NEXT: xvfnmsub.d $xr0, $xr2, $xr1, $xr0 +; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %v2 = load <4 x double>, ptr %a2 + %mul = fmul contract <4 x double> %v0, %v1 + %negv2 = fneg contract <4 x double> %v2 + %add = fadd contract <4 x double> %negv2, %mul + %negadd = fneg contract <4 x double> %add + store <4 x double> %negadd, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/fma-v8f32.ll b/llvm/test/CodeGen/LoongArch/lasx/fma-v8f32.ll new file mode 100644 index 0000000000000000000000000000000000000000..b7b3cb3a2e665ba654d6ee4bdd73df176f30f9cb --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/fma-v8f32.ll @@ -0,0 +1,804 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx --fp-contract=fast < %s \ +; RUN: | FileCheck %s --check-prefix=CONTRACT-FAST +; RUN: llc --mtriple=loongarch64 --mattr=+lasx --fp-contract=on < %s \ +; RUN: | FileCheck %s --check-prefix=CONTRACT-ON +; RUN: llc --mtriple=loongarch64 --mattr=+lasx --fp-contract=off < %s \ +; RUN: | FileCheck %s --check-prefix=CONTRACT-OFF + +define void @xvfmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: xvfmadd_s: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-FAST-NEXT: xvfmadd.s $xr0, $xr2, $xr1, $xr0 +; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: xvfmadd_s: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 +; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 +; CONTRACT-ON-NEXT: xvfmul.s $xr0, $xr1, $xr0 +; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 +; CONTRACT-ON-NEXT: xvfadd.s $xr0, $xr0, $xr1 +; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: xvfmadd_s: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 +; CONTRACT-OFF-NEXT: xvfmul.s $xr0, $xr1, $xr0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 +; CONTRACT-OFF-NEXT: xvfadd.s $xr0, $xr0, $xr1 +; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %v2 = load <8 x float>, ptr %a2 + %mul = fmul<8 x float> %v0, %v1 + %add = fadd<8 x float> %mul, %v2 + store <8 x float> %add, ptr %res + ret void +} + +define void @xvfmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: xvfmsub_s: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-FAST-NEXT: xvfmsub.s $xr0, $xr2, $xr1, $xr0 +; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: xvfmsub_s: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 +; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 +; CONTRACT-ON-NEXT: xvfmul.s $xr0, $xr1, $xr0 +; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 +; CONTRACT-ON-NEXT: xvfsub.s $xr0, $xr0, $xr1 +; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: xvfmsub_s: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 +; CONTRACT-OFF-NEXT: xvfmul.s $xr0, $xr1, $xr0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 +; CONTRACT-OFF-NEXT: xvfsub.s $xr0, $xr0, $xr1 +; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %v2 = load <8 x float>, ptr %a2 + %mul = fmul<8 x float> %v0, %v1 + %sub = fsub<8 x float> %mul, %v2 + store <8 x float> %sub, ptr %res + ret void +} + +define void @xvfnmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: xvfnmadd_s: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-FAST-NEXT: xvfnmadd.s $xr0, $xr2, $xr1, $xr0 +; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: xvfnmadd_s: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 +; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 +; CONTRACT-ON-NEXT: xvfmul.s $xr0, $xr1, $xr0 +; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 +; CONTRACT-ON-NEXT: xvfadd.s $xr0, $xr0, $xr1 +; CONTRACT-ON-NEXT: xvbitrevi.w $xr0, $xr0, 31 +; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: xvfnmadd_s: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 +; CONTRACT-OFF-NEXT: xvfmul.s $xr0, $xr1, $xr0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 +; CONTRACT-OFF-NEXT: xvfadd.s $xr0, $xr0, $xr1 +; CONTRACT-OFF-NEXT: xvbitrevi.w $xr0, $xr0, 31 +; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %v2 = load <8 x float>, ptr %a2 + %mul = fmul<8 x float> %v0, %v1 + %add = fadd<8 x float> %mul, %v2 + %negadd = fneg<8 x float> %add + store <8 x float> %negadd, ptr %res + ret void +} + +define void @xvfnmadd_s_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: xvfnmadd_s_nsz: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-FAST-NEXT: xvfnmadd.s $xr0, $xr2, $xr1, $xr0 +; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: xvfnmadd_s_nsz: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 +; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 +; CONTRACT-ON-NEXT: xvbitrevi.w $xr1, $xr1, 31 +; CONTRACT-ON-NEXT: xvfmul.s $xr0, $xr1, $xr0 +; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 +; CONTRACT-ON-NEXT: xvfsub.s $xr0, $xr0, $xr1 +; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: xvfnmadd_s_nsz: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 +; CONTRACT-OFF-NEXT: xvbitrevi.w $xr1, $xr1, 31 +; CONTRACT-OFF-NEXT: xvfmul.s $xr0, $xr1, $xr0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 +; CONTRACT-OFF-NEXT: xvfsub.s $xr0, $xr0, $xr1 +; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %v2 = load <8 x float>, ptr %a2 + %negv0 = fneg nsz<8 x float> %v0 + %negv2 = fneg nsz<8 x float> %v2 + %mul = fmul nsz<8 x float> %negv0, %v1 + %add = fadd nsz<8 x float> %mul, %negv2 + store <8 x float> %add, ptr %res + ret void +} + +;; Check that fnmadd.s is not emitted. +define void @not_xvfnmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: not_xvfnmadd_s: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-FAST-NEXT: xvbitrevi.w $xr2, $xr2, 31 +; CONTRACT-FAST-NEXT: xvfmsub.s $xr0, $xr2, $xr1, $xr0 +; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: not_xvfnmadd_s: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 +; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 +; CONTRACT-ON-NEXT: xvbitrevi.w $xr1, $xr1, 31 +; CONTRACT-ON-NEXT: xvfmul.s $xr0, $xr1, $xr0 +; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 +; CONTRACT-ON-NEXT: xvfsub.s $xr0, $xr0, $xr1 +; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: not_xvfnmadd_s: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 +; CONTRACT-OFF-NEXT: xvbitrevi.w $xr1, $xr1, 31 +; CONTRACT-OFF-NEXT: xvfmul.s $xr0, $xr1, $xr0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 +; CONTRACT-OFF-NEXT: xvfsub.s $xr0, $xr0, $xr1 +; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %v2 = load <8 x float>, ptr %a2 + %negv0 = fneg<8 x float> %v0 + %negv2 = fneg<8 x float> %v2 + %mul = fmul<8 x float> %negv0, %v1 + %add = fadd<8 x float> %mul, %negv2 + store <8 x float> %add, ptr %res + ret void +} + +define void @xvfnmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: xvfnmsub_s: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-FAST-NEXT: xvfnmsub.s $xr0, $xr2, $xr1, $xr0 +; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: xvfnmsub_s: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 +; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 +; CONTRACT-ON-NEXT: xvfmul.s $xr0, $xr1, $xr0 +; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 +; CONTRACT-ON-NEXT: xvfsub.s $xr0, $xr0, $xr1 +; CONTRACT-ON-NEXT: xvbitrevi.w $xr0, $xr0, 31 +; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: xvfnmsub_s: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 +; CONTRACT-OFF-NEXT: xvfmul.s $xr0, $xr1, $xr0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 +; CONTRACT-OFF-NEXT: xvfsub.s $xr0, $xr0, $xr1 +; CONTRACT-OFF-NEXT: xvbitrevi.w $xr0, $xr0, 31 +; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %v2 = load <8 x float>, ptr %a2 + %negv2 = fneg<8 x float> %v2 + %mul = fmul<8 x float> %v0, %v1 + %add = fadd<8 x float> %mul, %negv2 + %neg = fneg<8 x float> %add + store <8 x float> %neg, ptr %res + ret void +} + +define void @xvfnmsub_s_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: xvfnmsub_s_nsz: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-FAST-NEXT: xvfnmsub.s $xr0, $xr2, $xr1, $xr0 +; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: xvfnmsub_s_nsz: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 +; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 +; CONTRACT-ON-NEXT: xvfmul.s $xr0, $xr1, $xr0 +; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 +; CONTRACT-ON-NEXT: xvfsub.s $xr0, $xr1, $xr0 +; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: xvfnmsub_s_nsz: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 +; CONTRACT-OFF-NEXT: xvfmul.s $xr0, $xr1, $xr0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 +; CONTRACT-OFF-NEXT: xvfsub.s $xr0, $xr1, $xr0 +; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %v2 = load <8 x float>, ptr %a2 + %negv0 = fneg nsz<8 x float> %v0 + %mul = fmul nsz<8 x float> %negv0, %v1 + %add = fadd nsz<8 x float> %mul, %v2 + store <8 x float> %add, ptr %res + ret void +} + +;; Check that fnmsub.s is not emitted. +define void @not_xvfnmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: not_xvfnmsub_s: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-FAST-NEXT: xvbitrevi.w $xr2, $xr2, 31 +; CONTRACT-FAST-NEXT: xvfmadd.s $xr0, $xr2, $xr1, $xr0 +; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: not_xvfnmsub_s: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: xvld $xr0, $a2, 0 +; CONTRACT-ON-NEXT: xvld $xr1, $a1, 0 +; CONTRACT-ON-NEXT: xvfmul.s $xr0, $xr1, $xr0 +; CONTRACT-ON-NEXT: xvld $xr1, $a3, 0 +; CONTRACT-ON-NEXT: xvfsub.s $xr0, $xr1, $xr0 +; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: not_xvfnmsub_s: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: xvld $xr0, $a2, 0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a1, 0 +; CONTRACT-OFF-NEXT: xvfmul.s $xr0, $xr1, $xr0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a3, 0 +; CONTRACT-OFF-NEXT: xvfsub.s $xr0, $xr1, $xr0 +; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %v2 = load <8 x float>, ptr %a2 + %negv0 = fneg<8 x float> %v0 + %mul = fmul<8 x float> %negv0, %v1 + %add = fadd<8 x float> %mul, %v2 + store <8 x float> %add, ptr %res + ret void +} + +define void @contract_xvfmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: contract_xvfmadd_s: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-FAST-NEXT: xvfmadd.s $xr0, $xr2, $xr1, $xr0 +; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: contract_xvfmadd_s: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-ON-NEXT: xvfmadd.s $xr0, $xr2, $xr1, $xr0 +; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: contract_xvfmadd_s: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-OFF-NEXT: xvfmadd.s $xr0, $xr2, $xr1, $xr0 +; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %v2 = load <8 x float>, ptr %a2 + %mul = fmul contract <8 x float> %v0, %v1 + %add = fadd contract <8 x float> %mul, %v2 + store <8 x float> %add, ptr %res + ret void +} + +define void @contract_xvfmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: contract_xvfmsub_s: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-FAST-NEXT: xvfmsub.s $xr0, $xr2, $xr1, $xr0 +; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: contract_xvfmsub_s: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-ON-NEXT: xvfmsub.s $xr0, $xr2, $xr1, $xr0 +; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: contract_xvfmsub_s: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-OFF-NEXT: xvfmsub.s $xr0, $xr2, $xr1, $xr0 +; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %v2 = load <8 x float>, ptr %a2 + %mul = fmul contract <8 x float> %v0, %v1 + %sub = fsub contract <8 x float> %mul, %v2 + store <8 x float> %sub, ptr %res + ret void +} + +define void @contract_xvfnmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: contract_xvfnmadd_s: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-FAST-NEXT: xvfnmadd.s $xr0, $xr2, $xr1, $xr0 +; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: contract_xvfnmadd_s: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-ON-NEXT: xvfnmadd.s $xr0, $xr2, $xr1, $xr0 +; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: contract_xvfnmadd_s: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-OFF-NEXT: xvfnmadd.s $xr0, $xr2, $xr1, $xr0 +; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %v2 = load <8 x float>, ptr %a2 + %mul = fmul contract <8 x float> %v0, %v1 + %add = fadd contract <8 x float> %mul, %v2 + %negadd = fneg contract <8 x float> %add + store <8 x float> %negadd, ptr %res + ret void +} + +define void @contract_xvfnmadd_s_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: contract_xvfnmadd_s_nsz: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-FAST-NEXT: xvfnmadd.s $xr0, $xr2, $xr1, $xr0 +; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: contract_xvfnmadd_s_nsz: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-ON-NEXT: xvfnmadd.s $xr0, $xr2, $xr1, $xr0 +; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: contract_xvfnmadd_s_nsz: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-OFF-NEXT: xvfnmadd.s $xr0, $xr2, $xr1, $xr0 +; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %v2 = load <8 x float>, ptr %a2 + %negv0 = fneg contract nsz<8 x float> %v0 + %negv2 = fneg contract nsz<8 x float> %v2 + %mul = fmul contract nsz<8 x float> %negv0, %v1 + %add = fadd contract nsz<8 x float> %mul, %negv2 + store <8 x float> %add, ptr %res + ret void +} + +;; Check that fnmadd.s is not emitted. +define void @not_contract_xvfnmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: not_contract_xvfnmadd_s: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-FAST-NEXT: xvbitrevi.w $xr2, $xr2, 31 +; CONTRACT-FAST-NEXT: xvfmsub.s $xr0, $xr2, $xr1, $xr0 +; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: not_contract_xvfnmadd_s: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-ON-NEXT: xvbitrevi.w $xr2, $xr2, 31 +; CONTRACT-ON-NEXT: xvfmsub.s $xr0, $xr2, $xr1, $xr0 +; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: not_contract_xvfnmadd_s: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-OFF-NEXT: xvbitrevi.w $xr2, $xr2, 31 +; CONTRACT-OFF-NEXT: xvfmsub.s $xr0, $xr2, $xr1, $xr0 +; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %v2 = load <8 x float>, ptr %a2 + %negv0 = fneg contract <8 x float> %v0 + %negv2 = fneg contract <8 x float> %v2 + %mul = fmul contract <8 x float> %negv0, %v1 + %add = fadd contract <8 x float> %mul, %negv2 + store <8 x float> %add, ptr %res + ret void +} + +define void @contract_xvfnmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: contract_xvfnmsub_s: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-FAST-NEXT: xvfnmsub.s $xr0, $xr2, $xr1, $xr0 +; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: contract_xvfnmsub_s: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-ON-NEXT: xvfnmsub.s $xr0, $xr2, $xr1, $xr0 +; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: contract_xvfnmsub_s: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-OFF-NEXT: xvfnmsub.s $xr0, $xr2, $xr1, $xr0 +; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %v2 = load <8 x float>, ptr %a2 + %negv2 = fneg contract <8 x float> %v2 + %mul = fmul contract <8 x float> %v0, %v1 + %add = fadd contract <8 x float> %mul, %negv2 + %neg = fneg contract <8 x float> %add + store <8 x float> %neg, ptr %res + ret void +} + +define void @contract_xvfnmsub_s_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: contract_xvfnmsub_s_nsz: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-FAST-NEXT: xvfnmsub.s $xr0, $xr2, $xr1, $xr0 +; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: contract_xvfnmsub_s_nsz: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-ON-NEXT: xvfnmsub.s $xr0, $xr2, $xr1, $xr0 +; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: contract_xvfnmsub_s_nsz: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-OFF-NEXT: xvfnmsub.s $xr0, $xr2, $xr1, $xr0 +; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %v2 = load <8 x float>, ptr %a2 + %negv0 = fneg contract nsz<8 x float> %v0 + %mul = fmul contract nsz<8 x float> %negv0, %v1 + %add = fadd contract nsz<8 x float> %mul, %v2 + store <8 x float> %add, ptr %res + ret void +} + +;; Check that fnmsub.s is not emitted. +define void @not_contract_xvfnmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: not_contract_xvfnmsub_s: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-FAST-NEXT: xvbitrevi.w $xr2, $xr2, 31 +; CONTRACT-FAST-NEXT: xvfmadd.s $xr0, $xr2, $xr1, $xr0 +; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: not_contract_xvfnmsub_s: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-ON-NEXT: xvbitrevi.w $xr2, $xr2, 31 +; CONTRACT-ON-NEXT: xvfmadd.s $xr0, $xr2, $xr1, $xr0 +; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: not_contract_xvfnmsub_s: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-OFF-NEXT: xvbitrevi.w $xr2, $xr2, 31 +; CONTRACT-OFF-NEXT: xvfmadd.s $xr0, $xr2, $xr1, $xr0 +; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %v2 = load <8 x float>, ptr %a2 + %negv0 = fneg contract <8 x float> %v0 + %mul = fmul contract <8 x float> %negv0, %v1 + %add = fadd contract <8 x float> %mul, %v2 + store <8 x float> %add, ptr %res + ret void +} + +define void @xvfmadd_s_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: xvfmadd_s_contract: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-FAST-NEXT: xvfmadd.s $xr0, $xr2, $xr1, $xr0 +; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: xvfmadd_s_contract: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-ON-NEXT: xvfmadd.s $xr0, $xr2, $xr1, $xr0 +; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: xvfmadd_s_contract: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-OFF-NEXT: xvfmadd.s $xr0, $xr2, $xr1, $xr0 +; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %v2 = load <8 x float>, ptr %a2 + %mul = fmul contract <8 x float> %v0, %v1 + %add = fadd contract <8 x float> %mul, %v2 + store <8 x float> %add, ptr %res + ret void +} + +define void @xvfmsub_s_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: xvfmsub_s_contract: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-FAST-NEXT: xvfmsub.s $xr0, $xr2, $xr1, $xr0 +; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: xvfmsub_s_contract: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-ON-NEXT: xvfmsub.s $xr0, $xr2, $xr1, $xr0 +; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: xvfmsub_s_contract: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-OFF-NEXT: xvfmsub.s $xr0, $xr2, $xr1, $xr0 +; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %v2 = load <8 x float>, ptr %a2 + %mul = fmul contract <8 x float> %v0, %v1 + %sub = fsub contract <8 x float> %mul, %v2 + store <8 x float> %sub, ptr %res + ret void +} + +define void @xvfnmadd_s_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: xvfnmadd_s_contract: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-FAST-NEXT: xvfnmadd.s $xr0, $xr2, $xr1, $xr0 +; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: xvfnmadd_s_contract: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-ON-NEXT: xvfnmadd.s $xr0, $xr2, $xr1, $xr0 +; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: xvfnmadd_s_contract: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-OFF-NEXT: xvfnmadd.s $xr0, $xr2, $xr1, $xr0 +; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %v2 = load <8 x float>, ptr %a2 + %mul = fmul contract <8 x float> %v0, %v1 + %add = fadd contract <8 x float> %mul, %v2 + %negadd = fneg contract <8 x float> %add + store <8 x float> %negadd, ptr %res + ret void +} + +define void @xvfnmsub_s_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: xvfnmsub_s_contract: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-FAST-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-FAST-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-FAST-NEXT: xvfnmsub.s $xr0, $xr2, $xr1, $xr0 +; CONTRACT-FAST-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: xvfnmsub_s_contract: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-ON-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-ON-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-ON-NEXT: xvfnmsub.s $xr0, $xr2, $xr1, $xr0 +; CONTRACT-ON-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: xvfnmsub_s_contract: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: xvld $xr0, $a3, 0 +; CONTRACT-OFF-NEXT: xvld $xr1, $a2, 0 +; CONTRACT-OFF-NEXT: xvld $xr2, $a1, 0 +; CONTRACT-OFF-NEXT: xvfnmsub.s $xr0, $xr2, $xr1, $xr0 +; CONTRACT-OFF-NEXT: xvst $xr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %v2 = load <8 x float>, ptr %a2 + %mul = fmul contract <8 x float> %v0, %v1 + %negv2 = fneg contract <8 x float> %v2 + %add = fadd contract <8 x float> %negv2, %mul + %negadd = fneg contract <8 x float> %add + store <8 x float> %negadd, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/fsqrt.ll b/llvm/test/CodeGen/LoongArch/lasx/fsqrt.ll new file mode 100644 index 0000000000000000000000000000000000000000..c4a881bdeae9f1b81c756971141c75fb157b1d38 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/fsqrt.ll @@ -0,0 +1,65 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +;; fsqrt +define void @sqrt_v8f32(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: sqrt_v8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvfsqrt.s $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x float>, ptr %a0, align 16 + %sqrt = call <8 x float> @llvm.sqrt.v8f32 (<8 x float> %v0) + store <8 x float> %sqrt, ptr %res, align 16 + ret void +} + +define void @sqrt_v4f64(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: sqrt_v4f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvfsqrt.d $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x double>, ptr %a0, align 16 + %sqrt = call <4 x double> @llvm.sqrt.v4f64 (<4 x double> %v0) + store <4 x double> %sqrt, ptr %res, align 16 + ret void +} + +;; 1.0 / (fsqrt vec) +define void @one_div_sqrt_v8f32(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: one_div_sqrt_v8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvfrsqrt.s $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x float>, ptr %a0, align 16 + %sqrt = call <8 x float> @llvm.sqrt.v8f32 (<8 x float> %v0) + %div = fdiv <8 x float> , %sqrt + store <8 x float> %div, ptr %res, align 16 + ret void +} + +define void @one_div_sqrt_v4f64(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: one_div_sqrt_v4f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvfrsqrt.d $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x double>, ptr %a0, align 16 + %sqrt = call <4 x double> @llvm.sqrt.v4f64 (<4 x double> %v0) + %div = fdiv <4 x double> , %sqrt + store <4 x double> %div, ptr %res, align 16 + ret void +} + +declare <8 x float> @llvm.sqrt.v8f32(<8 x float>) +declare <4 x double> @llvm.sqrt.v4f64(<4 x double>) diff --git a/llvm/test/CodeGen/LoongArch/lasx/inline-asm-operand-modifier.ll b/llvm/test/CodeGen/LoongArch/lasx/inline-asm-operand-modifier.ll new file mode 100644 index 0000000000000000000000000000000000000000..201e34c8b5ae0d403a7fe0b79923fcb710b1987d --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/inline-asm-operand-modifier.ll @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +define void @test_u() nounwind { +; CHECK-LABEL: test_u: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: xvldi $xr0, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: ret +entry: + %0 = tail call <4 x i64> asm sideeffect "xvldi ${0:u}, 1", "=f"() + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/inline-asm-reg-names.ll b/llvm/test/CodeGen/LoongArch/lasx/inline-asm-reg-names.ll new file mode 100644 index 0000000000000000000000000000000000000000..dd400ecfcf91d7056a09a2c0f3e19237932ce419 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/inline-asm-reg-names.ll @@ -0,0 +1,58 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +define void @register_xr1() nounwind { +; CHECK-LABEL: register_xr1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: xvldi $xr1, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: ret +entry: + %0 = tail call <4 x i64> asm sideeffect "xvldi ${0:u}, 1", "={$xr1}"() + ret void +} + +define void @register_xr7() nounwind { +; CHECK-LABEL: register_xr7: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: xvldi $xr7, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: ret +entry: + %0 = tail call <4 x i64> asm sideeffect "xvldi ${0:u}, 1", "={$xr7}"() + ret void +} + +define void @register_xr23() nounwind { +; CHECK-LABEL: register_xr23: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: xvldi $xr23, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: ret +entry: + %0 = tail call <4 x i64> asm sideeffect "xvldi ${0:u}, 1", "={$xr23}"() + ret void +} + +;; The lower 64-bit of the vector register '$xr31' is overlapped with +;; the floating-point register '$f31' ('$fs7'). And '$f31' ('$fs7') +;; is a callee-saved register which is preserved across calls. +;; That's why the fst.d and fld.d instructions are emitted. +define void @register_xr31() nounwind { +; CHECK-LABEL: register_xr31: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: fst.d $fs7, $sp, 8 # 8-byte Folded Spill +; CHECK-NEXT: #APP +; CHECK-NEXT: xvldi $xr31, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: fld.d $fs7, $sp, 8 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: ret +entry: + %0 = tail call <4 x i64> asm sideeffect "xvldi ${0:u}, 1", "={$xr31}"() + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-absd.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-absd.ll new file mode 100644 index 0000000000000000000000000000000000000000..bf54f44357b03493163cebd7dd094117d47fa2eb --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-absd.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvabsd.b(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvabsd_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvabsd_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvabsd.b $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvabsd.b(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvabsd.h(<16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvabsd_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvabsd_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvabsd.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvabsd.h(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvabsd.w(<8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvabsd_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvabsd_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvabsd.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvabsd.w(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvabsd.d(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvabsd_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvabsd_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvabsd.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvabsd.d(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvabsd.bu(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvabsd_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvabsd_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvabsd.bu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvabsd.bu(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvabsd.hu(<16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvabsd_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvabsd_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvabsd.hu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvabsd.hu(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvabsd.wu(<8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvabsd_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvabsd_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvabsd.wu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvabsd.wu(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvabsd.du(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvabsd_du(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvabsd_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvabsd.du $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvabsd.du(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-add.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-add.ll new file mode 100644 index 0000000000000000000000000000000000000000..0c2f2ace29fc9384e1b652bb690af9dbfa919680 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-add.ll @@ -0,0 +1,62 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvadd.b(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvadd_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvadd_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvadd.b $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvadd.b(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvadd.h(<16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvadd_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvadd_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvadd.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvadd.h(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvadd.w(<8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvadd_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvadd_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvadd.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvadd.w(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvadd.d(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvadd_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvadd_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvadd.d(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvadd.q(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvadd_q(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvadd_q: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvadd.q $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvadd.q(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-adda.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-adda.ll new file mode 100644 index 0000000000000000000000000000000000000000..c1258d53e913ee81c7df474bada02bdeff3ec4f7 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-adda.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvadda.b(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvadda_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvadda_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvadda.b $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvadda.b(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvadda.h(<16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvadda_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvadda_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvadda.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvadda.h(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvadda.w(<8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvadda_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvadda_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvadda.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvadda.w(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvadda.d(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvadda_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvadda_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvadda.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvadda.d(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-addi-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-addi-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..4998847f091009b07edb0c11bdeca83749f0b524 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-addi-invalid-imm.ll @@ -0,0 +1,65 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvaddi.bu(<32 x i8>, i32) + +define <32 x i8> @lasx_xvaddi_bu_lo(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvaddi.bu: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvaddi.bu(<32 x i8> %va, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvaddi_bu_hi(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvaddi.bu: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvaddi.bu(<32 x i8> %va, i32 32) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvaddi.hu(<16 x i16>, i32) + +define <16 x i16> @lasx_xvaddi_hu_lo(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvaddi.hu: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvaddi.hu(<16 x i16> %va, i32 -1) + ret <16 x i16> %res +} + +define <16 x i16> @lasx_xvaddi_hu_hi(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvaddi.hu: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvaddi.hu(<16 x i16> %va, i32 32) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvaddi.wu(<8 x i32>, i32) + +define <8 x i32> @lasx_xvaddi_wu_lo(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvaddi.wu: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvaddi.wu(<8 x i32> %va, i32 -1) + ret <8 x i32> %res +} + +define <8 x i32> @lasx_xvaddi_wu_hi(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvaddi.wu: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvaddi.wu(<8 x i32> %va, i32 32) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvaddi.du(<4 x i64>, i32) + +define <4 x i64> @lasx_xvaddi_du_lo(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvaddi.du: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvaddi.du(<4 x i64> %va, i32 -1) + ret <4 x i64> %res +} + +define <4 x i64> @lasx_xvaddi_du_hi(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvaddi.du: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvaddi.du(<4 x i64> %va, i32 32) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-addi-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-addi-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..f25f0e61a28e17228403d2147bbf8ad79f56383d --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-addi-non-imm.ll @@ -0,0 +1,37 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvaddi.bu(<32 x i8>, i32) + +define <32 x i8> @lasx_xvaddi_bu(<32 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvaddi.bu(<32 x i8> %va, i32 %b) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvaddi.hu(<16 x i16>, i32) + +define <16 x i16> @lasx_xvaddi_hu(<16 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvaddi.hu(<16 x i16> %va, i32 %b) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvaddi.wu(<8 x i32>, i32) + +define <8 x i32> @lasx_xvaddi_wu(<8 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvaddi.wu(<8 x i32> %va, i32 %b) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvaddi.du(<4 x i64>, i32) + +define <4 x i64> @lasx_xvaddi_du(<4 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvaddi.du(<4 x i64> %va, i32 %b) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-addi.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-addi.ll new file mode 100644 index 0000000000000000000000000000000000000000..09b5d07a0151cd401e580ce313edc96e5e43fa84 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-addi.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvaddi.bu(<32 x i8>, i32) + +define <32 x i8> @lasx_xvaddi_bu(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xvaddi_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvaddi.bu $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvaddi.bu(<32 x i8> %va, i32 1) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvaddi.hu(<16 x i16>, i32) + +define <16 x i16> @lasx_xvaddi_hu(<16 x i16> %va) nounwind { +; CHECK-LABEL: lasx_xvaddi_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvaddi.hu $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvaddi.hu(<16 x i16> %va, i32 1) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvaddi.wu(<8 x i32>, i32) + +define <8 x i32> @lasx_xvaddi_wu(<8 x i32> %va) nounwind { +; CHECK-LABEL: lasx_xvaddi_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvaddi.wu $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvaddi.wu(<8 x i32> %va, i32 1) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvaddi.du(<4 x i64>, i32) + +define <4 x i64> @lasx_xvaddi_du(<4 x i64> %va) nounwind { +; CHECK-LABEL: lasx_xvaddi_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvaddi.du $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvaddi.du(<4 x i64> %va, i32 1) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-addw.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-addw.ll new file mode 100644 index 0000000000000000000000000000000000000000..ef7a1b5a50efb144d28083f20e71a8ec89952672 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-addw.ll @@ -0,0 +1,290 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.b(<32 x i8>, <32 x i8>) + +define <16 x i16> @lasx_xvaddwev_h_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvaddwev_h_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvaddwev.h.b $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.b(<32 x i8> %va, <32 x i8> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.h(<16 x i16>, <16 x i16>) + +define <8 x i32> @lasx_xvaddwev_w_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvaddwev_w_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvaddwev.w.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.h(<16 x i16> %va, <16 x i16> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.w(<8 x i32>, <8 x i32>) + +define <4 x i64> @lasx_xvaddwev_d_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvaddwev_d_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvaddwev.d.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.w(<8 x i32> %va, <8 x i32> %vb) + ret <4 x i64> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.d(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvaddwev_q_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvaddwev_q_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvaddwev.q.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.d(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu(<32 x i8>, <32 x i8>) + +define <16 x i16> @lasx_xvaddwev_h_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvaddwev_h_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvaddwev.h.bu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu(<32 x i8> %va, <32 x i8> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu(<16 x i16>, <16 x i16>) + +define <8 x i32> @lasx_xvaddwev_w_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvaddwev_w_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvaddwev.w.hu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu(<16 x i16> %va, <16 x i16> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu(<8 x i32>, <8 x i32>) + +define <4 x i64> @lasx_xvaddwev_d_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvaddwev_d_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvaddwev.d.wu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu(<8 x i32> %va, <8 x i32> %vb) + ret <4 x i64> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvaddwev_q_du(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvaddwev_q_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvaddwev.q.du $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu.b(<32 x i8>, <32 x i8>) + +define <16 x i16> @lasx_xvaddwev_h_bu_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvaddwev_h_bu_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvaddwev.h.bu.b $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu.b(<32 x i8> %va, <32 x i8> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu.h(<16 x i16>, <16 x i16>) + +define <8 x i32> @lasx_xvaddwev_w_hu_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvaddwev_w_hu_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvaddwev.w.hu.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu.h(<16 x i16> %va, <16 x i16> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu.w(<8 x i32>, <8 x i32>) + +define <4 x i64> @lasx_xvaddwev_d_wu_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvaddwev_d_wu_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvaddwev.d.wu.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu.w(<8 x i32> %va, <8 x i32> %vb) + ret <4 x i64> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du.d(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvaddwev_q_du_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvaddwev_q_du_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvaddwev.q.du.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du.d(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.b(<32 x i8>, <32 x i8>) + +define <16 x i16> @lasx_xvaddwod_h_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvaddwod_h_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvaddwod.h.b $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.b(<32 x i8> %va, <32 x i8> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.h(<16 x i16>, <16 x i16>) + +define <8 x i32> @lasx_xvaddwod_w_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvaddwod_w_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvaddwod.w.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.h(<16 x i16> %va, <16 x i16> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.w(<8 x i32>, <8 x i32>) + +define <4 x i64> @lasx_xvaddwod_d_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvaddwod_d_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvaddwod.d.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.w(<8 x i32> %va, <8 x i32> %vb) + ret <4 x i64> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.d(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvaddwod_q_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvaddwod_q_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvaddwod.q.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.d(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu(<32 x i8>, <32 x i8>) + +define <16 x i16> @lasx_xvaddwod_h_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvaddwod_h_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvaddwod.h.bu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu(<32 x i8> %va, <32 x i8> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu(<16 x i16>, <16 x i16>) + +define <8 x i32> @lasx_xvaddwod_w_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvaddwod_w_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvaddwod.w.hu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu(<16 x i16> %va, <16 x i16> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu(<8 x i32>, <8 x i32>) + +define <4 x i64> @lasx_xvaddwod_d_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvaddwod_d_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvaddwod.d.wu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu(<8 x i32> %va, <8 x i32> %vb) + ret <4 x i64> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvaddwod_q_du(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvaddwod_q_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvaddwod.q.du $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu.b(<32 x i8>, <32 x i8>) + +define <16 x i16> @lasx_xvaddwod_h_bu_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvaddwod_h_bu_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvaddwod.h.bu.b $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu.b(<32 x i8> %va, <32 x i8> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu.h(<16 x i16>, <16 x i16>) + +define <8 x i32> @lasx_xvaddwod_w_hu_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvaddwod_w_hu_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvaddwod.w.hu.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu.h(<16 x i16> %va, <16 x i16> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu.w(<8 x i32>, <8 x i32>) + +define <4 x i64> @lasx_xvaddwod_d_wu_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvaddwod_d_wu_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvaddwod.d.wu.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu.w(<8 x i32> %va, <8 x i32> %vb) + ret <4 x i64> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du.d(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvaddwod_q_du_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvaddwod_q_du_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvaddwod.q.du.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du.d(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-and.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-and.ll new file mode 100644 index 0000000000000000000000000000000000000000..15f3a8094770b050404b8034e02b4a717dcd11f2 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-and.ll @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvand.v(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvand_v(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvand_v: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvand.v(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-andi-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-andi-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..60f0b765f9546e160691bdc110a711be8b4a16ad --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-andi-invalid-imm.ll @@ -0,0 +1,17 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvandi.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvandi_b_lo(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvandi.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvandi.b(<32 x i8> %va, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvandi_b_hi(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvandi.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvandi.b(<32 x i8> %va, i32 256) + ret <32 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-andi-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-andi-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..1273dc6b450b51726f7ded830acee768c09f5e2f --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-andi-non-imm.ll @@ -0,0 +1,10 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvandi.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvandi_b(<32 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvandi.b(<32 x i8> %va, i32 %b) + ret <32 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-andi.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-andi.ll new file mode 100644 index 0000000000000000000000000000000000000000..88cf142d696823758fbfeadf64f685aaf0745a1c --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-andi.ll @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvandi.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvandi_b(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xvandi_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvandi.b $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvandi.b(<32 x i8> %va, i32 1) + ret <32 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-andn.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-andn.ll new file mode 100644 index 0000000000000000000000000000000000000000..f385ef3661cb9dcac057afc1c1e33a964fd14d21 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-andn.ll @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvandn.v(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvandn_v(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvandn_v: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvandn.v $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvandn.v(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-avg.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-avg.ll new file mode 100644 index 0000000000000000000000000000000000000000..488d3b96b00384b520d5c84207a7526acc234450 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-avg.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvavg.b(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvavg_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvavg_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvavg.b $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvavg.b(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvavg.h(<16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvavg_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvavg_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvavg.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvavg.h(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvavg.w(<8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvavg_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvavg_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvavg.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvavg.w(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvavg.d(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvavg_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvavg_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvavg.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvavg.d(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvavg.bu(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvavg_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvavg_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvavg.bu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvavg.bu(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvavg.hu(<16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvavg_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvavg_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvavg.hu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvavg.hu(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvavg.wu(<8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvavg_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvavg_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvavg.wu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvavg.wu(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvavg.du(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvavg_du(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvavg_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvavg.du $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvavg.du(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-avgr.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-avgr.ll new file mode 100644 index 0000000000000000000000000000000000000000..b5ab5a5366aafe5aa404c3fa3ba68eb94ba2ecd9 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-avgr.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvavgr.b(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvavgr_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvavgr_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvavgr.b $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvavgr.b(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvavgr.h(<16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvavgr_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvavgr_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvavgr.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvavgr.h(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvavgr.w(<8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvavgr_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvavgr_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvavgr.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvavgr.w(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvavgr.d(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvavgr_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvavgr_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvavgr.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvavgr.d(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvavgr.bu(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvavgr_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvavgr_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvavgr.bu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvavgr.bu(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvavgr.hu(<16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvavgr_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvavgr_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvavgr.hu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvavgr.hu(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvavgr.wu(<8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvavgr_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvavgr_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvavgr.wu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvavgr.wu(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvavgr.du(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvavgr_du(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvavgr_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvavgr.du $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvavgr.du(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitclr-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitclr-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..ecc287e89bbc00910982f23989144682936018f6 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitclr-invalid-imm.ll @@ -0,0 +1,65 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvbitclri.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvbitclri_b_lo(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvbitclri.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvbitclri.b(<32 x i8> %va, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvbitclri_b_hi(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvbitclri.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvbitclri.b(<32 x i8> %va, i32 8) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvbitclri.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvbitclri_h_lo(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvbitclri.h: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvbitclri.h(<16 x i16> %va, i32 -1) + ret <16 x i16> %res +} + +define <16 x i16> @lasx_xvbitclri_h_hi(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvbitclri.h: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvbitclri.h(<16 x i16> %va, i32 16) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvbitclri.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvbitclri_w_lo(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvbitclri.w: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvbitclri.w(<8 x i32> %va, i32 -1) + ret <8 x i32> %res +} + +define <8 x i32> @lasx_xvbitclri_w_hi(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvbitclri.w: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvbitclri.w(<8 x i32> %va, i32 32) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvbitclri.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvbitclri_d_lo(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvbitclri.d: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvbitclri.d(<4 x i64> %va, i32 -1) + ret <4 x i64> %res +} + +define <4 x i64> @lasx_xvbitclri_d_hi(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvbitclri.d: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvbitclri.d(<4 x i64> %va, i32 64) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitclr-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitclr-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..09da85411082b67353443116ce8abfd2232595c9 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitclr-non-imm.ll @@ -0,0 +1,37 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvbitclri.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvbitclri_b(<32 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvbitclri.b(<32 x i8> %va, i32 %b) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvbitclri.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvbitclri_h(<16 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvbitclri.h(<16 x i16> %va, i32 %b) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvbitclri.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvbitclri_w(<8 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvbitclri.w(<8 x i32> %va, i32 %b) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvbitclri.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvbitclri_d(<4 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvbitclri.d(<4 x i64> %va, i32 %b) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitclr.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitclr.ll new file mode 100644 index 0000000000000000000000000000000000000000..cec71bab2fe84cdb394c09c0d7de6283c25d50db --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitclr.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvbitclr.b(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvbitclr_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvbitclr_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvbitclr.b $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvbitclr.b(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvbitclr.h(<16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvbitclr_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvbitclr_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvbitclr.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvbitclr.h(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvbitclr.w(<8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvbitclr_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvbitclr_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvbitclr.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvbitclr.w(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvbitclr.d(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvbitclr_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvbitclr_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvbitclr.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvbitclr.d(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvbitclri.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvbitclri_b(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xvbitclri_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvbitclri.b $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvbitclri.b(<32 x i8> %va, i32 1) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvbitclri.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvbitclri_h(<16 x i16> %va) nounwind { +; CHECK-LABEL: lasx_xvbitclri_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvbitclri.h $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvbitclri.h(<16 x i16> %va, i32 1) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvbitclri.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvbitclri_w(<8 x i32> %va) nounwind { +; CHECK-LABEL: lasx_xvbitclri_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvbitclri.w $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvbitclri.w(<8 x i32> %va, i32 1) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvbitclri.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvbitclri_d(<4 x i64> %va) nounwind { +; CHECK-LABEL: lasx_xvbitclri_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvbitclri.d $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvbitclri.d(<4 x i64> %va, i32 1) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitrev-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitrev-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..dff0884fdd5aa8cefcd53e5c34b7bc01bb9f03b8 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitrev-invalid-imm.ll @@ -0,0 +1,65 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvbitrevi.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvbitrevi_b_lo(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvbitrevi.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvbitrevi.b(<32 x i8> %va, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvbitrevi_b_hi(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvbitrevi.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvbitrevi.b(<32 x i8> %va, i32 8) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvbitrevi.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvbitrevi_lo(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvbitrevi.h: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvbitrevi.h(<16 x i16> %va, i32 -1) + ret <16 x i16> %res +} + +define <16 x i16> @lasx_xvbitrevi_h_hi(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvbitrevi.h: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvbitrevi.h(<16 x i16> %va, i32 16) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvbitrevi.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvbitrevi_w_lo(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvbitrevi.w: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvbitrevi.w(<8 x i32> %va, i32 -1) + ret <8 x i32> %res +} + +define <8 x i32> @lasx_xvbitrevi_w_hi(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvbitrevi.w: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvbitrevi.w(<8 x i32> %va, i32 32) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvbitrevi.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvbitrevi_d_lo(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvbitrevi.d: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvbitrevi.d(<4 x i64> %va, i32 -1) + ret <4 x i64> %res +} + +define <4 x i64> @lasx_xvbitrevi_d_hi(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvbitrevi.d: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvbitrevi.d(<4 x i64> %va, i32 64) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitrev-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitrev-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..e1aef1a82f0c10b06953b6daa0450600bc762cbf --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitrev-non-imm.ll @@ -0,0 +1,37 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvbitrevi.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvbitrevi_b(<32 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvbitrevi.b(<32 x i8> %va, i32 %b) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvbitrevi.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvbitrevi_h(<16 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvbitrevi.h(<16 x i16> %va, i32 %b) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvbitrevi.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvbitrevi_w(<8 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvbitrevi.w(<8 x i32> %va, i32 %b) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvbitrevi.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvbitrevi_d(<4 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvbitrevi.d(<4 x i64> %va, i32 %b) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitrev.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitrev.ll new file mode 100644 index 0000000000000000000000000000000000000000..fb4f9fbc2e4b39ab85a979ee09135b2466f9da41 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitrev.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvbitrev.b(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvbitrev_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvbitrev_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvbitrev.b $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvbitrev.b(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvbitrev.h(<16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvbitrev_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvbitrev_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvbitrev.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvbitrev.h(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvbitrev.w(<8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvbitrev_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvbitrev_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvbitrev.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvbitrev.w(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvbitrev.d(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvbitrev_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvbitrev_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvbitrev.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvbitrev.d(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvbitrevi.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvbitrevi_b(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xvbitrevi_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvbitrevi.b $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvbitrevi.b(<32 x i8> %va, i32 1) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvbitrevi.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvbitrevi_h(<16 x i16> %va) nounwind { +; CHECK-LABEL: lasx_xvbitrevi_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvbitrevi.h $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvbitrevi.h(<16 x i16> %va, i32 1) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvbitrevi.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvbitrevi_w(<8 x i32> %va) nounwind { +; CHECK-LABEL: lasx_xvbitrevi_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvbitrevi.w $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvbitrevi.w(<8 x i32> %va, i32 1) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvbitrevi.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvbitrevi_d(<4 x i64> %va) nounwind { +; CHECK-LABEL: lasx_xvbitrevi_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvbitrevi.d $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvbitrevi.d(<4 x i64> %va, i32 1) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitsel.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitsel.ll new file mode 100644 index 0000000000000000000000000000000000000000..2e91407590ac16d789270d7f6cb2abca8d8252a9 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitsel.ll @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvbitsel.v(<32 x i8>, <32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvbitsel_v(<32 x i8> %va, <32 x i8> %vb, <32 x i8> %vc) nounwind { +; CHECK-LABEL: lasx_xvbitsel_v: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvbitsel.v $xr0, $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvbitsel.v(<32 x i8> %va, <32 x i8> %vb, <32 x i8> %vc) + ret <32 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitseli-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitseli-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..3f6fd44f842c65669c1ec252d7368905104b9e8b --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitseli-invalid-imm.ll @@ -0,0 +1,17 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvbitseli.b(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvbitseli_b_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvbitseli.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvbitseli.b(<32 x i8> %va, <32 x i8> %vb, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvbitseli_b_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvbitseli.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvbitseli.b(<32 x i8> %va, <32 x i8> %vb, i32 256) + ret <32 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitseli-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitseli-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..40533ab96d86aa8b43fbecf04bd88bb95a4de470 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitseli-non-imm.ll @@ -0,0 +1,10 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvbitseli.b(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvbitseli_b(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvbitseli.b(<32 x i8> %va, <32 x i8> %vb, i32 %c) + ret <32 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitseli.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitseli.ll new file mode 100644 index 0000000000000000000000000000000000000000..79dd55cbfef9881df9753d557c73fb93e9f647a0 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitseli.ll @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvbitseli.b(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvbitseli_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvbitseli_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvbitseli.b $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvbitseli.b(<32 x i8> %va, <32 x i8> %vb, i32 1) + ret <32 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitset-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitset-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..17a77ece7775b8d4e7f5ebc2add71f882a2025b7 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitset-invalid-imm.ll @@ -0,0 +1,65 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvbitseti.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvbitseti_b_lo(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvbitseti.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvbitseti.b(<32 x i8> %va, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvbitseti_b_hi(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvbitseti.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvbitseti.b(<32 x i8> %va, i32 8) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvbitseti.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvbitseti_h_lo(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvbitseti.h: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvbitseti.h(<16 x i16> %va, i32 -1) + ret <16 x i16> %res +} + +define <16 x i16> @lasx_xvbitseti_h_hi(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvbitseti.h: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvbitseti.h(<16 x i16> %va, i32 16) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvbitseti.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvbitseti_w_lo(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvbitseti.w: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvbitseti.w(<8 x i32> %va, i32 -1) + ret <8 x i32> %res +} + +define <8 x i32> @lasx_xvbitseti_w_hi(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvbitseti.w: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvbitseti.w(<8 x i32> %va, i32 32) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvbitseti.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvbitseti_d_lo(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvbitseti.d: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvbitseti.d(<4 x i64> %va, i32 -1) + ret <4 x i64> %res +} + +define <4 x i64> @lasx_xvbitseti_d_hi(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvbitseti.d: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvbitseti.d(<4 x i64> %va, i32 64) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitset-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitset-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..613285804e0e4b832110f1754c0361d105476f04 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitset-non-imm.ll @@ -0,0 +1,37 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvbitseti.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvbitseti_b(<32 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvbitseti.b(<32 x i8> %va, i32 %b) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvbitseti.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvbitseti_h(<16 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvbitseti.h(<16 x i16> %va, i32 %b) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvbitseti.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvbitseti_w(<8 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvbitseti.w(<8 x i32> %va, i32 %b) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvbitseti.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvbitseti_d(<4 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvbitseti.d(<4 x i64> %va, i32 %b) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitset.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitset.ll new file mode 100644 index 0000000000000000000000000000000000000000..83d1f0ef60c63c04a8f53e28d52ff2a0813734c2 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bitset.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvbitset.b(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvbitset_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvbitset_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvbitset.b $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvbitset.b(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvbitset.h(<16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvbitset_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvbitset_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvbitset.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvbitset.h(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvbitset.w(<8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvbitset_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvbitset_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvbitset.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvbitset.w(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvbitset.d(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvbitset_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvbitset_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvbitset.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvbitset.d(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvbitseti.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvbitseti_b(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xvbitseti_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvbitseti.b $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvbitseti.b(<32 x i8> %va, i32 1) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvbitseti.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvbitseti_h(<16 x i16> %va) nounwind { +; CHECK-LABEL: lasx_xvbitseti_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvbitseti.h $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvbitseti.h(<16 x i16> %va, i32 1) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvbitseti.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvbitseti_w(<8 x i32> %va) nounwind { +; CHECK-LABEL: lasx_xvbitseti_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvbitseti.w $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvbitseti.w(<8 x i32> %va, i32 1) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvbitseti.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvbitseti_d(<4 x i64> %va) nounwind { +; CHECK-LABEL: lasx_xvbitseti_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvbitseti.d $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvbitseti.d(<4 x i64> %va, i32 1) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsll-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsll-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..1da08a633bd2b864fac441735216be54986c5721 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsll-invalid-imm.ll @@ -0,0 +1,17 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvbsll.v(<32 x i8>, i32) + +define <32 x i8> @lasx_xvbsll_v_lo(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvbsll.v: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvbsll.v(<32 x i8> %va, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvbsll_v_hi(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvbsll.v: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvbsll.v(<32 x i8> %va, i32 32) + ret <32 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsll-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsll-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..e19a3232c179705aaae8d5f8678a7624b5122ed2 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsll-non-imm.ll @@ -0,0 +1,10 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvbsll.v(<32 x i8>, i32) + +define <32 x i8> @lasx_xvbsll_v(<32 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvbsll.v(<32 x i8> %va, i32 %b) + ret <32 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsll.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsll.ll new file mode 100644 index 0000000000000000000000000000000000000000..cbb63ced5cc00dd0b95f1ebc2e0337daad3a34c0 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsll.ll @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvbsll.v(<32 x i8>, i32) + +define <32 x i8> @lasx_xvbsll_v(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xvbsll_v: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvbsll.v $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvbsll.v(<32 x i8> %va, i32 1) + ret <32 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsrl-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsrl-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..5d2b63391e677140fc67e56b128ca459595c31fa --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsrl-invalid-imm.ll @@ -0,0 +1,17 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvbsrl.v(<32 x i8>, i32) + +define <32 x i8> @lasx_xvbsrl_v_lo(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvbsrl.v: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvbsrl.v(<32 x i8> %va, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvbsrl_v_hi(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvbsrl.v: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvbsrl.v(<32 x i8> %va, i32 32) + ret <32 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsrl-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsrl-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..8dfd0ca579b84f7bcfdffbcf3851062a4aa08ba8 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsrl-non-imm.ll @@ -0,0 +1,10 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvbsrl.v(<32 x i8>, i32) + +define <32 x i8> @lasx_xvbsrl_v(<32 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvbsrl.v(<32 x i8> %va, i32 %b) + ret <32 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsrl.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsrl.ll new file mode 100644 index 0000000000000000000000000000000000000000..b0c26cbe3e35c367874cb155c1aa5f464a8d2a23 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-bsrl.ll @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvbsrl.v(<32 x i8>, i32) + +define <32 x i8> @lasx_xvbsrl_v(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xvbsrl_v: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvbsrl.v $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvbsrl.v(<32 x i8> %va, i32 1) + ret <32 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-clo.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-clo.ll new file mode 100644 index 0000000000000000000000000000000000000000..29b2be03d54eca44af2cac54e0007bdf0c3c059c --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-clo.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvclo.b(<32 x i8>) + +define <32 x i8> @lasx_xvclo_b(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xvclo_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvclo.b $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvclo.b(<32 x i8> %va) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvclo.h(<16 x i16>) + +define <16 x i16> @lasx_xvclo_h(<16 x i16> %va) nounwind { +; CHECK-LABEL: lasx_xvclo_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvclo.h $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvclo.h(<16 x i16> %va) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvclo.w(<8 x i32>) + +define <8 x i32> @lasx_xvclo_w(<8 x i32> %va) nounwind { +; CHECK-LABEL: lasx_xvclo_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvclo.w $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvclo.w(<8 x i32> %va) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvclo.d(<4 x i64>) + +define <4 x i64> @lasx_xvclo_d(<4 x i64> %va) nounwind { +; CHECK-LABEL: lasx_xvclo_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvclo.d $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvclo.d(<4 x i64> %va) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-clz.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-clz.ll new file mode 100644 index 0000000000000000000000000000000000000000..5247ceedbd146ee59edc6398631496c0166247b5 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-clz.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvclz.b(<32 x i8>) + +define <32 x i8> @lasx_xvclz_b(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xvclz_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvclz.b $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvclz.b(<32 x i8> %va) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvclz.h(<16 x i16>) + +define <16 x i16> @lasx_xvclz_h(<16 x i16> %va) nounwind { +; CHECK-LABEL: lasx_xvclz_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvclz.h $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvclz.h(<16 x i16> %va) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvclz.w(<8 x i32>) + +define <8 x i32> @lasx_xvclz_w(<8 x i32> %va) nounwind { +; CHECK-LABEL: lasx_xvclz_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvclz.w $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvclz.w(<8 x i32> %va) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvclz.d(<4 x i64>) + +define <4 x i64> @lasx_xvclz_d(<4 x i64> %va) nounwind { +; CHECK-LABEL: lasx_xvclz_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvclz.d $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvclz.d(<4 x i64> %va) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-div.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-div.ll new file mode 100644 index 0000000000000000000000000000000000000000..813204092e944af1d27782d30f2a34d3c2200ca6 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-div.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvdiv.b(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvdiv_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvdiv_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvdiv.b $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvdiv.b(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvdiv.h(<16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvdiv_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvdiv_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvdiv.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvdiv.h(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvdiv.w(<8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvdiv_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvdiv_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvdiv.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvdiv.w(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvdiv.d(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvdiv_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvdiv_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvdiv.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvdiv.d(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvdiv.bu(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvdiv_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvdiv_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvdiv.bu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvdiv.bu(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvdiv.hu(<16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvdiv_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvdiv_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvdiv.hu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvdiv.hu(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvdiv.wu(<8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvdiv_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvdiv_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvdiv.wu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvdiv.wu(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvdiv.du(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvdiv_du(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvdiv_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvdiv.du $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvdiv.du(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ext2xv.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ext2xv.ll new file mode 100644 index 0000000000000000000000000000000000000000..48721b52af00913533887e382b3702cce55e3953 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ext2xv.ll @@ -0,0 +1,146 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <16 x i16> @llvm.loongarch.lasx.vext2xv.h.b(<32 x i8>) + +define <16 x i16> @lasx_vext2xv_h_b(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_vext2xv_h_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vext2xv.h.b $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.vext2xv.h.b(<32 x i8> %va) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.vext2xv.w.b(<32 x i8>) + +define <8 x i32> @lasx_vext2xv_w_b(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_vext2xv_w_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vext2xv.w.b $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.b(<32 x i8> %va) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.vext2xv.d.b(<32 x i8>) + +define <4 x i64> @lasx_vext2xv_d_b(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_vext2xv_d_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vext2xv.d.b $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.b(<32 x i8> %va) + ret <4 x i64> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.vext2xv.w.h(<16 x i16>) + +define <8 x i32> @lasx_vext2xv_w_h(<16 x i16> %va) nounwind { +; CHECK-LABEL: lasx_vext2xv_w_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vext2xv.w.h $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.h(<16 x i16> %va) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.vext2xv.d.h(<16 x i16>) + +define <4 x i64> @lasx_vext2xv_d_h(<16 x i16> %va) nounwind { +; CHECK-LABEL: lasx_vext2xv_d_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vext2xv.d.h $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.h(<16 x i16> %va) + ret <4 x i64> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.vext2xv.d.w(<8 x i32>) + +define <4 x i64> @lasx_vext2xv_d_w(<8 x i32> %va) nounwind { +; CHECK-LABEL: lasx_vext2xv_d_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vext2xv.d.w $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.w(<8 x i32> %va) + ret <4 x i64> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.vext2xv.hu.bu(<32 x i8>) + +define <16 x i16> @lasx_vext2xv_hu_bu(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_vext2xv_hu_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vext2xv.hu.bu $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.vext2xv.hu.bu(<32 x i8> %va) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.bu(<32 x i8>) + +define <8 x i32> @lasx_vext2xv_wu_bu(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_vext2xv_wu_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vext2xv.wu.bu $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.bu(<32 x i8> %va) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.vext2xv.du.bu(<32 x i8>) + +define <4 x i64> @lasx_vext2xv_du_bu(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_vext2xv_du_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vext2xv.du.bu $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.bu(<32 x i8> %va) + ret <4 x i64> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.hu(<16 x i16>) + +define <8 x i32> @lasx_vext2xv_wu_hu(<16 x i16> %va) nounwind { +; CHECK-LABEL: lasx_vext2xv_wu_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vext2xv.wu.hu $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.hu(<16 x i16> %va) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.vext2xv.du.hu(<16 x i16>) + +define <4 x i64> @lasx_vext2xv_du_hu(<16 x i16> %va) nounwind { +; CHECK-LABEL: lasx_vext2xv_du_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vext2xv.du.hu $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.hu(<16 x i16> %va) + ret <4 x i64> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.vext2xv.du.wu(<8 x i32>) + +define <4 x i64> @lasx_vext2xv_du_wu(<8 x i32> %va) nounwind { +; CHECK-LABEL: lasx_vext2xv_du_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vext2xv.du.wu $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.wu(<8 x i32> %va) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-exth.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-exth.ll new file mode 100644 index 0000000000000000000000000000000000000000..543589e61b12f7b5b8b420769694c4beb4212110 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-exth.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <16 x i16> @llvm.loongarch.lasx.xvexth.h.b(<32 x i8>) + +define <16 x i16> @lasx_xvexth_h_b(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xvexth_h_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvexth.h.b $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvexth.h.b(<32 x i8> %va) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvexth.w.h(<16 x i16>) + +define <8 x i32> @lasx_xvexth_w_h(<16 x i16> %va) nounwind { +; CHECK-LABEL: lasx_xvexth_w_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvexth.w.h $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvexth.w.h(<16 x i16> %va) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvexth.d.w(<8 x i32>) + +define <4 x i64> @lasx_xvexth_d_w(<8 x i32> %va) nounwind { +; CHECK-LABEL: lasx_xvexth_d_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvexth.d.w $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvexth.d.w(<8 x i32> %va) + ret <4 x i64> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvexth.q.d(<4 x i64>) + +define <4 x i64> @lasx_xvexth_q_d(<4 x i64> %va) nounwind { +; CHECK-LABEL: lasx_xvexth_q_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvexth.q.d $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvexth.q.d(<4 x i64> %va) + ret <4 x i64> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvexth.hu.bu(<32 x i8>) + +define <16 x i16> @lasx_xvexth_hu_bu(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xvexth_hu_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvexth.hu.bu $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvexth.hu.bu(<32 x i8> %va) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvexth.wu.hu(<16 x i16>) + +define <8 x i32> @lasx_xvexth_wu_hu(<16 x i16> %va) nounwind { +; CHECK-LABEL: lasx_xvexth_wu_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvexth.wu.hu $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvexth.wu.hu(<16 x i16> %va) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvexth.du.wu(<8 x i32>) + +define <4 x i64> @lasx_xvexth_du_wu(<8 x i32> %va) nounwind { +; CHECK-LABEL: lasx_xvexth_du_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvexth.du.wu $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvexth.du.wu(<8 x i32> %va) + ret <4 x i64> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvexth.qu.du(<4 x i64>) + +define <4 x i64> @lasx_xvexth_qu_du(<4 x i64> %va) nounwind { +; CHECK-LABEL: lasx_xvexth_qu_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvexth.qu.du $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvexth.qu.du(<4 x i64> %va) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-extl.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-extl.ll new file mode 100644 index 0000000000000000000000000000000000000000..7040c8c784cdfea6d47b5c003febf3895bcae2c2 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-extl.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <4 x i64> @llvm.loongarch.lasx.xvextl.q.d(<4 x i64>) + +define <4 x i64> @lasx_xvextl_q_d(<4 x i64> %va) nounwind { +; CHECK-LABEL: lasx_xvextl_q_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvextl.q.d $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvextl.q.d(<4 x i64> %va) + ret <4 x i64> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvextl.qu.du(<4 x i64>) + +define <4 x i64> @lasx_xvextl_qu_du(<4 x i64> %va) nounwind { +; CHECK-LABEL: lasx_xvextl_qu_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvextl.qu.du $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvextl.qu.du(<4 x i64> %va) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-extrins-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-extrins-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..1301b8a146eb7d334e120433f6d041fac4121d75 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-extrins-invalid-imm.ll @@ -0,0 +1,65 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvextrins.b(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvextrins_b_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvextrins.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvextrins.b(<32 x i8> %va, <32 x i8> %vb, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvextrins_b_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvextrins.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvextrins.b(<32 x i8> %va, <32 x i8> %vb, i32 256) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvextrins.h(<16 x i16>, <16 x i16>, i32) + +define <16 x i16> @lasx_xvextrins_h_lo(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvextrins.h: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvextrins.h(<16 x i16> %va, <16 x i16> %vb, i32 -1) + ret <16 x i16> %res +} + +define <16 x i16> @lasx_xvextrins_h_hi(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvextrins.h: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvextrins.h(<16 x i16> %va, <16 x i16> %vb, i32 256) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvextrins.w(<8 x i32>, <8 x i32>, i32) + +define <8 x i32> @lasx_xvextrins_w_lo(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvextrins.w: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvextrins.w(<8 x i32> %va, <8 x i32> %vb, i32 -1) + ret <8 x i32> %res +} + +define <8 x i32> @lasx_xvextrins_w_hi(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvextrins.w: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvextrins.w(<8 x i32> %va, <8 x i32> %vb, i32 256) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvextrins.d(<4 x i64>, <4 x i64>, i32) + +define <4 x i64> @lasx_xvextrins_d_lo(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvextrins.d: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvextrins.d(<4 x i64> %va, <4 x i64> %vb, i32 -1) + ret <4 x i64> %res +} + +define <4 x i64> @lasx_xvextrins_d_hi(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvextrins.d: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvextrins.d(<4 x i64> %va, <4 x i64> %vb, i32 256) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-extrins-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-extrins-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..bca8f8b3c778fbd2d43245c536cfc0e2d2d4a768 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-extrins-non-imm.ll @@ -0,0 +1,37 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvextrins.b(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvextrins_b(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvextrins.b(<32 x i8> %va, <32 x i8> %vb, i32 %c) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvextrins.h(<16 x i16>, <16 x i16>, i32) + +define <16 x i16> @lasx_xvextrins_h(<16 x i16> %va, <16 x i16> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvextrins.h(<16 x i16> %va, <16 x i16> %vb, i32 %c) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvextrins.w(<8 x i32>, <8 x i32>, i32) + +define <8 x i32> @lasx_xvextrins_w(<8 x i32> %va, <8 x i32> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvextrins.w(<8 x i32> %va, <8 x i32> %vb, i32 %c) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvextrins.d(<4 x i64>, <4 x i64>, i32) + +define <4 x i64> @lasx_xvextrins_d(<4 x i64> %va, <4 x i64> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvextrins.d(<4 x i64> %va, <4 x i64> %vb, i32 %c) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-extrins.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-extrins.ll new file mode 100644 index 0000000000000000000000000000000000000000..c8774a7b29c0b5f5463c6a656441ca3d77aa82f9 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-extrins.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvextrins.b(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvextrins_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvextrins_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvextrins.b $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvextrins.b(<32 x i8> %va, <32 x i8> %vb, i32 1) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvextrins.h(<16 x i16>, <16 x i16>, i32) + +define <16 x i16> @lasx_xvextrins_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvextrins_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvextrins.h $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvextrins.h(<16 x i16> %va, <16 x i16> %vb, i32 1) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvextrins.w(<8 x i32>, <8 x i32>, i32) + +define <8 x i32> @lasx_xvextrins_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvextrins_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvextrins.w $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvextrins.w(<8 x i32> %va, <8 x i32> %vb, i32 1) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvextrins.d(<4 x i64>, <4 x i64>, i32) + +define <4 x i64> @lasx_xvextrins_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvextrins_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvextrins.d $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvextrins.d(<4 x i64> %va, <4 x i64> %vb, i32 1) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fadd.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fadd.ll new file mode 100644 index 0000000000000000000000000000000000000000..563a0ce9e384d5c6e53d4c9ffec243ff7d835d58 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fadd.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <8 x float> @llvm.loongarch.lasx.xvfadd.s(<8 x float>, <8 x float>) + +define <8 x float> @lasx_xvfadd_s(<8 x float> %va, <8 x float> %vb) nounwind { +; CHECK-LABEL: lasx_xvfadd_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfadd.s $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x float> @llvm.loongarch.lasx.xvfadd.s(<8 x float> %va, <8 x float> %vb) + ret <8 x float> %res +} + +declare <4 x double> @llvm.loongarch.lasx.xvfadd.d(<4 x double>, <4 x double>) + +define <4 x double> @lasx_xvfadd_d(<4 x double> %va, <4 x double> %vb) nounwind { +; CHECK-LABEL: lasx_xvfadd_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfadd.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x double> @llvm.loongarch.lasx.xvfadd.d(<4 x double> %va, <4 x double> %vb) + ret <4 x double> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fclass.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fclass.ll new file mode 100644 index 0000000000000000000000000000000000000000..901ca5bb026019469430bc1ae8f6b6fc1d1d7975 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fclass.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <8 x i32> @llvm.loongarch.lasx.xvfclass.s(<8 x float>) + +define <8 x i32> @lasx_xvfclass_s(<8 x float> %va) nounwind { +; CHECK-LABEL: lasx_xvfclass_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfclass.s $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvfclass.s(<8 x float> %va) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvfclass.d(<4 x double>) + +define <4 x i64> @lasx_xvfclass_d(<4 x double> %va) nounwind { +; CHECK-LABEL: lasx_xvfclass_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfclass.d $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvfclass.d(<4 x double> %va) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcmp.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcmp.ll new file mode 100644 index 0000000000000000000000000000000000000000..b01f908e71af5b43e337ad8f76c7f4595bcb4106 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcmp.ll @@ -0,0 +1,530 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.caf.s(<8 x float>, <8 x float>) + +define <8 x i32> @lasx_xvfcmp_caf_s(<8 x float> %va, <8 x float> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcmp_caf_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcmp.caf.s $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.caf.s(<8 x float> %va, <8 x float> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.caf.d(<4 x double>, <4 x double>) + +define <4 x i64> @lasx_xvfcmp_caf_d(<4 x double> %va, <4 x double> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcmp_caf_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcmp.caf.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.caf.d(<4 x double> %va, <4 x double> %vb) + ret <4 x i64> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.cun.s(<8 x float>, <8 x float>) + +define <8 x i32> @lasx_xvfcmp_cun_s(<8 x float> %va, <8 x float> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcmp_cun_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcmp.cun.s $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cun.s(<8 x float> %va, <8 x float> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.cun.d(<4 x double>, <4 x double>) + +define <4 x i64> @lasx_xvfcmp_cun_d(<4 x double> %va, <4 x double> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcmp_cun_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcmp.cun.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cun.d(<4 x double> %va, <4 x double> %vb) + ret <4 x i64> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.ceq.s(<8 x float>, <8 x float>) + +define <8 x i32> @lasx_xvfcmp_ceq_s(<8 x float> %va, <8 x float> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcmp_ceq_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcmp.ceq.s $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.ceq.s(<8 x float> %va, <8 x float> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.ceq.d(<4 x double>, <4 x double>) + +define <4 x i64> @lasx_xvfcmp_ceq_d(<4 x double> %va, <4 x double> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcmp_ceq_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcmp.ceq.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.ceq.d(<4 x double> %va, <4 x double> %vb) + ret <4 x i64> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.cueq.s(<8 x float>, <8 x float>) + +define <8 x i32> @lasx_xvfcmp_cueq_s(<8 x float> %va, <8 x float> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcmp_cueq_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcmp.cueq.s $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cueq.s(<8 x float> %va, <8 x float> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.cueq.d(<4 x double>, <4 x double>) + +define <4 x i64> @lasx_xvfcmp_cueq_d(<4 x double> %va, <4 x double> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcmp_cueq_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcmp.cueq.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cueq.d(<4 x double> %va, <4 x double> %vb) + ret <4 x i64> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.clt.s(<8 x float>, <8 x float>) + +define <8 x i32> @lasx_xvfcmp_clt_s(<8 x float> %va, <8 x float> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcmp_clt_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcmp.clt.s $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.clt.s(<8 x float> %va, <8 x float> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.clt.d(<4 x double>, <4 x double>) + +define <4 x i64> @lasx_xvfcmp_clt_d(<4 x double> %va, <4 x double> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcmp_clt_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcmp.clt.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.clt.d(<4 x double> %va, <4 x double> %vb) + ret <4 x i64> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.cult.s(<8 x float>, <8 x float>) + +define <8 x i32> @lasx_xvfcmp_cult_s(<8 x float> %va, <8 x float> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcmp_cult_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcmp.cult.s $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cult.s(<8 x float> %va, <8 x float> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.cult.d(<4 x double>, <4 x double>) + +define <4 x i64> @lasx_xvfcmp_cult_d(<4 x double> %va, <4 x double> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcmp_cult_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcmp.cult.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cult.d(<4 x double> %va, <4 x double> %vb) + ret <4 x i64> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.cle.s(<8 x float>, <8 x float>) + +define <8 x i32> @lasx_xvfcmp_cle_s(<8 x float> %va, <8 x float> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcmp_cle_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcmp.cle.s $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cle.s(<8 x float> %va, <8 x float> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.cle.d(<4 x double>, <4 x double>) + +define <4 x i64> @lasx_xvfcmp_cle_d(<4 x double> %va, <4 x double> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcmp_cle_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcmp.cle.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cle.d(<4 x double> %va, <4 x double> %vb) + ret <4 x i64> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.cule.s(<8 x float>, <8 x float>) + +define <8 x i32> @lasx_xvfcmp_cule_s(<8 x float> %va, <8 x float> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcmp_cule_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcmp.cule.s $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cule.s(<8 x float> %va, <8 x float> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.cule.d(<4 x double>, <4 x double>) + +define <4 x i64> @lasx_xvfcmp_cule_d(<4 x double> %va, <4 x double> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcmp_cule_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcmp.cule.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cule.d(<4 x double> %va, <4 x double> %vb) + ret <4 x i64> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.cne.s(<8 x float>, <8 x float>) + +define <8 x i32> @lasx_xvfcmp_cne_s(<8 x float> %va, <8 x float> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcmp_cne_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcmp.cne.s $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cne.s(<8 x float> %va, <8 x float> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.cne.d(<4 x double>, <4 x double>) + +define <4 x i64> @lasx_xvfcmp_cne_d(<4 x double> %va, <4 x double> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcmp_cne_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcmp.cne.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cne.d(<4 x double> %va, <4 x double> %vb) + ret <4 x i64> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.cor.s(<8 x float>, <8 x float>) + +define <8 x i32> @lasx_xvfcmp_cor_s(<8 x float> %va, <8 x float> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcmp_cor_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcmp.cor.s $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cor.s(<8 x float> %va, <8 x float> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.cor.d(<4 x double>, <4 x double>) + +define <4 x i64> @lasx_xvfcmp_cor_d(<4 x double> %va, <4 x double> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcmp_cor_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcmp.cor.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cor.d(<4 x double> %va, <4 x double> %vb) + ret <4 x i64> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.cune.s(<8 x float>, <8 x float>) + +define <8 x i32> @lasx_xvfcmp_cune_s(<8 x float> %va, <8 x float> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcmp_cune_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcmp.cune.s $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cune.s(<8 x float> %va, <8 x float> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.cune.d(<4 x double>, <4 x double>) + +define <4 x i64> @lasx_xvfcmp_cune_d(<4 x double> %va, <4 x double> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcmp_cune_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcmp.cune.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cune.d(<4 x double> %va, <4 x double> %vb) + ret <4 x i64> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.saf.s(<8 x float>, <8 x float>) + +define <8 x i32> @lasx_xvfcmp_saf_s(<8 x float> %va, <8 x float> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcmp_saf_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcmp.saf.s $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.saf.s(<8 x float> %va, <8 x float> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.saf.d(<4 x double>, <4 x double>) + +define <4 x i64> @lasx_xvfcmp_saf_d(<4 x double> %va, <4 x double> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcmp_saf_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcmp.saf.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.saf.d(<4 x double> %va, <4 x double> %vb) + ret <4 x i64> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.sun.s(<8 x float>, <8 x float>) + +define <8 x i32> @lasx_xvfcmp_sun_s(<8 x float> %va, <8 x float> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcmp_sun_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcmp.sun.s $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sun.s(<8 x float> %va, <8 x float> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.sun.d(<4 x double>, <4 x double>) + +define <4 x i64> @lasx_xvfcmp_sun_d(<4 x double> %va, <4 x double> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcmp_sun_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcmp.sun.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sun.d(<4 x double> %va, <4 x double> %vb) + ret <4 x i64> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.seq.s(<8 x float>, <8 x float>) + +define <8 x i32> @lasx_xvfcmp_seq_s(<8 x float> %va, <8 x float> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcmp_seq_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcmp.seq.s $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.seq.s(<8 x float> %va, <8 x float> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.seq.d(<4 x double>, <4 x double>) + +define <4 x i64> @lasx_xvfcmp_seq_d(<4 x double> %va, <4 x double> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcmp_seq_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcmp.seq.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.seq.d(<4 x double> %va, <4 x double> %vb) + ret <4 x i64> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.sueq.s(<8 x float>, <8 x float>) + +define <8 x i32> @lasx_xvfcmp_sueq_s(<8 x float> %va, <8 x float> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcmp_sueq_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcmp.sueq.s $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sueq.s(<8 x float> %va, <8 x float> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.sueq.d(<4 x double>, <4 x double>) + +define <4 x i64> @lasx_xvfcmp_sueq_d(<4 x double> %va, <4 x double> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcmp_sueq_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcmp.sueq.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sueq.d(<4 x double> %va, <4 x double> %vb) + ret <4 x i64> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.slt.s(<8 x float>, <8 x float>) + +define <8 x i32> @lasx_xvfcmp_slt_s(<8 x float> %va, <8 x float> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcmp_slt_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcmp.slt.s $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.slt.s(<8 x float> %va, <8 x float> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.slt.d(<4 x double>, <4 x double>) + +define <4 x i64> @lasx_xvfcmp_slt_d(<4 x double> %va, <4 x double> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcmp_slt_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcmp.slt.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.slt.d(<4 x double> %va, <4 x double> %vb) + ret <4 x i64> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.sult.s(<8 x float>, <8 x float>) + +define <8 x i32> @lasx_xvfcmp_sult_s(<8 x float> %va, <8 x float> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcmp_sult_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcmp.sult.s $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sult.s(<8 x float> %va, <8 x float> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.sult.d(<4 x double>, <4 x double>) + +define <4 x i64> @lasx_xvfcmp_sult_d(<4 x double> %va, <4 x double> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcmp_sult_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcmp.sult.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sult.d(<4 x double> %va, <4 x double> %vb) + ret <4 x i64> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.sle.s(<8 x float>, <8 x float>) + +define <8 x i32> @lasx_xvfcmp_sle_s(<8 x float> %va, <8 x float> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcmp_sle_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcmp.sle.s $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sle.s(<8 x float> %va, <8 x float> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.sle.d(<4 x double>, <4 x double>) + +define <4 x i64> @lasx_xvfcmp_sle_d(<4 x double> %va, <4 x double> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcmp_sle_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcmp.sle.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sle.d(<4 x double> %va, <4 x double> %vb) + ret <4 x i64> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.sule.s(<8 x float>, <8 x float>) + +define <8 x i32> @lasx_xvfcmp_sule_s(<8 x float> %va, <8 x float> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcmp_sule_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcmp.sule.s $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sule.s(<8 x float> %va, <8 x float> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.sule.d(<4 x double>, <4 x double>) + +define <4 x i64> @lasx_xvfcmp_sule_d(<4 x double> %va, <4 x double> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcmp_sule_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcmp.sule.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sule.d(<4 x double> %va, <4 x double> %vb) + ret <4 x i64> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.sne.s(<8 x float>, <8 x float>) + +define <8 x i32> @lasx_xvfcmp_sne_s(<8 x float> %va, <8 x float> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcmp_sne_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcmp.sne.s $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sne.s(<8 x float> %va, <8 x float> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.sne.d(<4 x double>, <4 x double>) + +define <4 x i64> @lasx_xvfcmp_sne_d(<4 x double> %va, <4 x double> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcmp_sne_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcmp.sne.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sne.d(<4 x double> %va, <4 x double> %vb) + ret <4 x i64> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.sor.s(<8 x float>, <8 x float>) + +define <8 x i32> @lasx_xvfcmp_sor_s(<8 x float> %va, <8 x float> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcmp_sor_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcmp.sor.s $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sor.s(<8 x float> %va, <8 x float> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.sor.d(<4 x double>, <4 x double>) + +define <4 x i64> @lasx_xvfcmp_sor_d(<4 x double> %va, <4 x double> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcmp_sor_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcmp.sor.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sor.d(<4 x double> %va, <4 x double> %vb) + ret <4 x i64> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvfcmp.sune.s(<8 x float>, <8 x float>) + +define <8 x i32> @lasx_xvfcmp_sune_s(<8 x float> %va, <8 x float> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcmp_sune_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcmp.sune.s $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sune.s(<8 x float> %va, <8 x float> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvfcmp.sune.d(<4 x double>, <4 x double>) + +define <4 x i64> @lasx_xvfcmp_sune_d(<4 x double> %va, <4 x double> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcmp_sune_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcmp.sune.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sune.d(<4 x double> %va, <4 x double> %vb) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcvt.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcvt.ll new file mode 100644 index 0000000000000000000000000000000000000000..82bf1d3df72c6c2f87550b5b492fa2039b44f9b0 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcvt.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <16 x i16> @llvm.loongarch.lasx.xvfcvt.h.s(<8 x float>, <8 x float>) + +define <16 x i16> @lasx_xvfcvt_h_s(<8 x float> %va, <8 x float> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcvt_h_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcvt.h.s $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvfcvt.h.s(<8 x float> %va, <8 x float> %vb) + ret <16 x i16> %res +} + +declare <8 x float> @llvm.loongarch.lasx.xvfcvt.s.d(<4 x double>, <4 x double>) + +define <8 x float> @lasx_xvfcvt_s_d(<4 x double> %va, <4 x double> %vb) nounwind { +; CHECK-LABEL: lasx_xvfcvt_s_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcvt.s.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x float> @llvm.loongarch.lasx.xvfcvt.s.d(<4 x double> %va, <4 x double> %vb) + ret <8 x float> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcvth.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcvth.ll new file mode 100644 index 0000000000000000000000000000000000000000..e1a6a2923e6770505473939238719fa00f4eb963 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcvth.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <8 x float> @llvm.loongarch.lasx.xvfcvth.s.h(<16 x i16>) + +define <8 x float> @lasx_xvfcvth_s_h(<16 x i16> %va) nounwind { +; CHECK-LABEL: lasx_xvfcvth_s_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcvth.s.h $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <8 x float> @llvm.loongarch.lasx.xvfcvth.s.h(<16 x i16> %va) + ret <8 x float> %res +} + +declare <4 x double> @llvm.loongarch.lasx.xvfcvth.d.s(<8 x float>) + +define <4 x double> @lasx_xvfcvth_d_s(<8 x float> %va) nounwind { +; CHECK-LABEL: lasx_xvfcvth_d_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcvth.d.s $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x double> @llvm.loongarch.lasx.xvfcvth.d.s(<8 x float> %va) + ret <4 x double> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcvtl.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcvtl.ll new file mode 100644 index 0000000000000000000000000000000000000000..0b3e693c7f51de7f08e05556b4a6caca41c67800 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fcvtl.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <8 x float> @llvm.loongarch.lasx.xvfcvtl.s.h(<16 x i16>) + +define <8 x float> @lasx_xvfcvtl_s_h(<16 x i16> %va) nounwind { +; CHECK-LABEL: lasx_xvfcvtl_s_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcvtl.s.h $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <8 x float> @llvm.loongarch.lasx.xvfcvtl.s.h(<16 x i16> %va) + ret <8 x float> %res +} + +declare <4 x double> @llvm.loongarch.lasx.xvfcvtl.d.s(<8 x float>) + +define <4 x double> @lasx_xvfcvtl_d_s(<8 x float> %va) nounwind { +; CHECK-LABEL: lasx_xvfcvtl_d_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfcvtl.d.s $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x double> @llvm.loongarch.lasx.xvfcvtl.d.s(<8 x float> %va) + ret <4 x double> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fdiv.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fdiv.ll new file mode 100644 index 0000000000000000000000000000000000000000..49923ddd4e8dec5c65296b9177c34549e00e42d2 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fdiv.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <8 x float> @llvm.loongarch.lasx.xvfdiv.s(<8 x float>, <8 x float>) + +define <8 x float> @lasx_xvfdiv_s(<8 x float> %va, <8 x float> %vb) nounwind { +; CHECK-LABEL: lasx_xvfdiv_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfdiv.s $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x float> @llvm.loongarch.lasx.xvfdiv.s(<8 x float> %va, <8 x float> %vb) + ret <8 x float> %res +} + +declare <4 x double> @llvm.loongarch.lasx.xvfdiv.d(<4 x double>, <4 x double>) + +define <4 x double> @lasx_xvfdiv_d(<4 x double> %va, <4 x double> %vb) nounwind { +; CHECK-LABEL: lasx_xvfdiv_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfdiv.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x double> @llvm.loongarch.lasx.xvfdiv.d(<4 x double> %va, <4 x double> %vb) + ret <4 x double> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ffint.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ffint.ll new file mode 100644 index 0000000000000000000000000000000000000000..24da0bd3383877fc88e1e9a310f49ce167641a7c --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ffint.ll @@ -0,0 +1,86 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <8 x float> @llvm.loongarch.lasx.xvffint.s.w(<8 x i32>) + +define <8 x float> @lasx_xvffint_s_w(<8 x i32> %va) nounwind { +; CHECK-LABEL: lasx_xvffint_s_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvffint.s.w $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <8 x float> @llvm.loongarch.lasx.xvffint.s.w(<8 x i32> %va) + ret <8 x float> %res +} + +declare <4 x double> @llvm.loongarch.lasx.xvffint.d.l(<4 x i64>) + +define <4 x double> @lasx_xvffint_d_l(<4 x i64> %va) nounwind { +; CHECK-LABEL: lasx_xvffint_d_l: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvffint.d.l $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x double> @llvm.loongarch.lasx.xvffint.d.l(<4 x i64> %va) + ret <4 x double> %res +} + +declare <8 x float> @llvm.loongarch.lasx.xvffint.s.wu(<8 x i32>) + +define <8 x float> @lasx_xvffint_s_wu(<8 x i32> %va) nounwind { +; CHECK-LABEL: lasx_xvffint_s_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvffint.s.wu $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <8 x float> @llvm.loongarch.lasx.xvffint.s.wu(<8 x i32> %va) + ret <8 x float> %res +} + +declare <4 x double> @llvm.loongarch.lasx.xvffint.d.lu(<4 x i64>) + +define <4 x double> @lasx_xvffint_d_lu(<4 x i64> %va) nounwind { +; CHECK-LABEL: lasx_xvffint_d_lu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvffint.d.lu $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x double> @llvm.loongarch.lasx.xvffint.d.lu(<4 x i64> %va) + ret <4 x double> %res +} + +declare <4 x double> @llvm.loongarch.lasx.xvffintl.d.w(<8 x i32>) + +define <4 x double> @lasx_xvffintl_d_w(<8 x i32> %va) nounwind { +; CHECK-LABEL: lasx_xvffintl_d_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvffintl.d.w $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x double> @llvm.loongarch.lasx.xvffintl.d.w(<8 x i32> %va) + ret <4 x double> %res +} + +declare <4 x double> @llvm.loongarch.lasx.xvffinth.d.w(<8 x i32>) + +define <4 x double> @lasx_xvffinth_d_w(<8 x i32> %va) nounwind { +; CHECK-LABEL: lasx_xvffinth_d_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvffinth.d.w $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x double> @llvm.loongarch.lasx.xvffinth.d.w(<8 x i32> %va) + ret <4 x double> %res +} + +declare <8 x float> @llvm.loongarch.lasx.xvffint.s.l(<4 x i64>, <4 x i64>) + +define <8 x float> @lasx_xvffint_s_l(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvffint_s_l: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvffint.s.l $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x float> @llvm.loongarch.lasx.xvffint.s.l(<4 x i64> %va, <4 x i64> %vb) + ret <8 x float> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-flogb.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-flogb.ll new file mode 100644 index 0000000000000000000000000000000000000000..bccef4504d70e2a136d095be5e201c34fadb800f --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-flogb.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <8 x float> @llvm.loongarch.lasx.xvflogb.s(<8 x float>) + +define <8 x float> @lasx_xvflogb_s(<8 x float> %va) nounwind { +; CHECK-LABEL: lasx_xvflogb_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvflogb.s $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <8 x float> @llvm.loongarch.lasx.xvflogb.s(<8 x float> %va) + ret <8 x float> %res +} + +declare <4 x double> @llvm.loongarch.lasx.xvflogb.d(<4 x double>) + +define <4 x double> @lasx_xvflogb_d(<4 x double> %va) nounwind { +; CHECK-LABEL: lasx_xvflogb_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvflogb.d $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x double> @llvm.loongarch.lasx.xvflogb.d(<4 x double> %va) + ret <4 x double> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmadd.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmadd.ll new file mode 100644 index 0000000000000000000000000000000000000000..0fc06f97166028542e2dd590282f942654de295b --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmadd.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <8 x float> @llvm.loongarch.lasx.xvfmadd.s(<8 x float>, <8 x float>, <8 x float>) + +define <8 x float> @lasx_xvfmadd_s(<8 x float> %va, <8 x float> %vb, <8 x float> %vc) nounwind { +; CHECK-LABEL: lasx_xvfmadd_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfmadd.s $xr0, $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <8 x float> @llvm.loongarch.lasx.xvfmadd.s(<8 x float> %va, <8 x float> %vb, <8 x float> %vc) + ret <8 x float> %res +} + +declare <4 x double> @llvm.loongarch.lasx.xvfmadd.d(<4 x double>, <4 x double>, <4 x double>) + +define <4 x double> @lasx_xvfmadd_d(<4 x double> %va, <4 x double> %vb, <4 x double> %vc) nounwind { +; CHECK-LABEL: lasx_xvfmadd_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfmadd.d $xr0, $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <4 x double> @llvm.loongarch.lasx.xvfmadd.d(<4 x double> %va, <4 x double> %vb, <4 x double> %vc) + ret <4 x double> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmax.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmax.ll new file mode 100644 index 0000000000000000000000000000000000000000..2422fa0c00d8bfd71d7921d2b648561471dad12b --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmax.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <8 x float> @llvm.loongarch.lasx.xvfmax.s(<8 x float>, <8 x float>) + +define <8 x float> @lasx_xvfmax_s(<8 x float> %va, <8 x float> %vb) nounwind { +; CHECK-LABEL: lasx_xvfmax_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfmax.s $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x float> @llvm.loongarch.lasx.xvfmax.s(<8 x float> %va, <8 x float> %vb) + ret <8 x float> %res +} + +declare <4 x double> @llvm.loongarch.lasx.xvfmax.d(<4 x double>, <4 x double>) + +define <4 x double> @lasx_xvfmax_d(<4 x double> %va, <4 x double> %vb) nounwind { +; CHECK-LABEL: lasx_xvfmax_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfmax.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x double> @llvm.loongarch.lasx.xvfmax.d(<4 x double> %va, <4 x double> %vb) + ret <4 x double> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmaxa.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmaxa.ll new file mode 100644 index 0000000000000000000000000000000000000000..cd9ccc656aef668d2311702b0d8ed37ecba26b06 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmaxa.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <8 x float> @llvm.loongarch.lasx.xvfmaxa.s(<8 x float>, <8 x float>) + +define <8 x float> @lasx_xvfmaxa_s(<8 x float> %va, <8 x float> %vb) nounwind { +; CHECK-LABEL: lasx_xvfmaxa_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfmaxa.s $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x float> @llvm.loongarch.lasx.xvfmaxa.s(<8 x float> %va, <8 x float> %vb) + ret <8 x float> %res +} + +declare <4 x double> @llvm.loongarch.lasx.xvfmaxa.d(<4 x double>, <4 x double>) + +define <4 x double> @lasx_xvfmaxa_d(<4 x double> %va, <4 x double> %vb) nounwind { +; CHECK-LABEL: lasx_xvfmaxa_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfmaxa.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x double> @llvm.loongarch.lasx.xvfmaxa.d(<4 x double> %va, <4 x double> %vb) + ret <4 x double> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmin.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmin.ll new file mode 100644 index 0000000000000000000000000000000000000000..effb3f9e1d75a8ec52b1ffc2b229a98fccb06c73 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmin.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <8 x float> @llvm.loongarch.lasx.xvfmin.s(<8 x float>, <8 x float>) + +define <8 x float> @lasx_xvfmin_s(<8 x float> %va, <8 x float> %vb) nounwind { +; CHECK-LABEL: lasx_xvfmin_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfmin.s $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x float> @llvm.loongarch.lasx.xvfmin.s(<8 x float> %va, <8 x float> %vb) + ret <8 x float> %res +} + +declare <4 x double> @llvm.loongarch.lasx.xvfmin.d(<4 x double>, <4 x double>) + +define <4 x double> @lasx_xvfmin_d(<4 x double> %va, <4 x double> %vb) nounwind { +; CHECK-LABEL: lasx_xvfmin_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfmin.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x double> @llvm.loongarch.lasx.xvfmin.d(<4 x double> %va, <4 x double> %vb) + ret <4 x double> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmina.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmina.ll new file mode 100644 index 0000000000000000000000000000000000000000..753a6f31ba061c7429644a83acabc8c1a6788e99 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmina.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <8 x float> @llvm.loongarch.lasx.xvfmina.s(<8 x float>, <8 x float>) + +define <8 x float> @lasx_xvfmina_s(<8 x float> %va, <8 x float> %vb) nounwind { +; CHECK-LABEL: lasx_xvfmina_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfmina.s $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x float> @llvm.loongarch.lasx.xvfmina.s(<8 x float> %va, <8 x float> %vb) + ret <8 x float> %res +} + +declare <4 x double> @llvm.loongarch.lasx.xvfmina.d(<4 x double>, <4 x double>) + +define <4 x double> @lasx_xvfmina_d(<4 x double> %va, <4 x double> %vb) nounwind { +; CHECK-LABEL: lasx_xvfmina_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfmina.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x double> @llvm.loongarch.lasx.xvfmina.d(<4 x double> %va, <4 x double> %vb) + ret <4 x double> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmsub.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmsub.ll new file mode 100644 index 0000000000000000000000000000000000000000..57909d0dd1689f692dee38dce3d646fdc9c7b59a --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmsub.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <8 x float> @llvm.loongarch.lasx.xvfmsub.s(<8 x float>, <8 x float>, <8 x float>) + +define <8 x float> @lasx_xvfmsub_s(<8 x float> %va, <8 x float> %vb, <8 x float> %vc) nounwind { +; CHECK-LABEL: lasx_xvfmsub_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfmsub.s $xr0, $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <8 x float> @llvm.loongarch.lasx.xvfmsub.s(<8 x float> %va, <8 x float> %vb, <8 x float> %vc) + ret <8 x float> %res +} + +declare <4 x double> @llvm.loongarch.lasx.xvfmsub.d(<4 x double>, <4 x double>, <4 x double>) + +define <4 x double> @lasx_xvfmsub_d(<4 x double> %va, <4 x double> %vb, <4 x double> %vc) nounwind { +; CHECK-LABEL: lasx_xvfmsub_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfmsub.d $xr0, $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <4 x double> @llvm.loongarch.lasx.xvfmsub.d(<4 x double> %va, <4 x double> %vb, <4 x double> %vc) + ret <4 x double> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmul.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmul.ll new file mode 100644 index 0000000000000000000000000000000000000000..9cad6f38306618c5f7c2199b05abbb37033051d1 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fmul.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <8 x float> @llvm.loongarch.lasx.xvfmul.s(<8 x float>, <8 x float>) + +define <8 x float> @lasx_xvfmul_s(<8 x float> %va, <8 x float> %vb) nounwind { +; CHECK-LABEL: lasx_xvfmul_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfmul.s $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x float> @llvm.loongarch.lasx.xvfmul.s(<8 x float> %va, <8 x float> %vb) + ret <8 x float> %res +} + +declare <4 x double> @llvm.loongarch.lasx.xvfmul.d(<4 x double>, <4 x double>) + +define <4 x double> @lasx_xvfmul_d(<4 x double> %va, <4 x double> %vb) nounwind { +; CHECK-LABEL: lasx_xvfmul_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfmul.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x double> @llvm.loongarch.lasx.xvfmul.d(<4 x double> %va, <4 x double> %vb) + ret <4 x double> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fnmadd.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fnmadd.ll new file mode 100644 index 0000000000000000000000000000000000000000..c30993590f98a89039d6741aeb3696b0856c6364 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fnmadd.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <8 x float> @llvm.loongarch.lasx.xvfnmadd.s(<8 x float>, <8 x float>, <8 x float>) + +define <8 x float> @lasx_xvfnmadd_s(<8 x float> %va, <8 x float> %vb, <8 x float> %vc) nounwind { +; CHECK-LABEL: lasx_xvfnmadd_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfnmadd.s $xr0, $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <8 x float> @llvm.loongarch.lasx.xvfnmadd.s(<8 x float> %va, <8 x float> %vb, <8 x float> %vc) + ret <8 x float> %res +} + +declare <4 x double> @llvm.loongarch.lasx.xvfnmadd.d(<4 x double>, <4 x double>, <4 x double>) + +define <4 x double> @lasx_xvfnmadd_d(<4 x double> %va, <4 x double> %vb, <4 x double> %vc) nounwind { +; CHECK-LABEL: lasx_xvfnmadd_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfnmadd.d $xr0, $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <4 x double> @llvm.loongarch.lasx.xvfnmadd.d(<4 x double> %va, <4 x double> %vb, <4 x double> %vc) + ret <4 x double> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fnmsub.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fnmsub.ll new file mode 100644 index 0000000000000000000000000000000000000000..2e7ca695be62567cdeabdadade4c39d731ffc68d --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fnmsub.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <8 x float> @llvm.loongarch.lasx.xvfnmsub.s(<8 x float>, <8 x float>, <8 x float>) + +define <8 x float> @lasx_xvfnmsub_s(<8 x float> %va, <8 x float> %vb, <8 x float> %vc) nounwind { +; CHECK-LABEL: lasx_xvfnmsub_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfnmsub.s $xr0, $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <8 x float> @llvm.loongarch.lasx.xvfnmsub.s(<8 x float> %va, <8 x float> %vb, <8 x float> %vc) + ret <8 x float> %res +} + +declare <4 x double> @llvm.loongarch.lasx.xvfnmsub.d(<4 x double>, <4 x double>, <4 x double>) + +define <4 x double> @lasx_xvfnmsub_d(<4 x double> %va, <4 x double> %vb, <4 x double> %vc) nounwind { +; CHECK-LABEL: lasx_xvfnmsub_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfnmsub.d $xr0, $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <4 x double> @llvm.loongarch.lasx.xvfnmsub.d(<4 x double> %va, <4 x double> %vb, <4 x double> %vc) + ret <4 x double> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frecip.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frecip.ll new file mode 100644 index 0000000000000000000000000000000000000000..da3a26df2824e871ebd018496cfaedf5c3df6965 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frecip.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <8 x float> @llvm.loongarch.lasx.xvfrecip.s(<8 x float>) + +define <8 x float> @lasx_xvfrecip_s(<8 x float> %va) nounwind { +; CHECK-LABEL: lasx_xvfrecip_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfrecip.s $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <8 x float> @llvm.loongarch.lasx.xvfrecip.s(<8 x float> %va) + ret <8 x float> %res +} + +declare <4 x double> @llvm.loongarch.lasx.xvfrecip.d(<4 x double>) + +define <4 x double> @lasx_xvfrecip_d(<4 x double> %va) nounwind { +; CHECK-LABEL: lasx_xvfrecip_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfrecip.d $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x double> @llvm.loongarch.lasx.xvfrecip.d(<4 x double> %va) + ret <4 x double> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frint.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frint.ll new file mode 100644 index 0000000000000000000000000000000000000000..ddead27cd14b5b01879f541a4d13d48288cf5aa2 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frint.ll @@ -0,0 +1,122 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <8 x float> @llvm.loongarch.lasx.xvfrintrne.s(<8 x float>) + +define <8 x float> @lasx_xvfrintrne_s(<8 x float> %va) nounwind { +; CHECK-LABEL: lasx_xvfrintrne_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfrintrne.s $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <8 x float> @llvm.loongarch.lasx.xvfrintrne.s(<8 x float> %va) + ret <8 x float> %res +} + +declare <4 x double> @llvm.loongarch.lasx.xvfrintrne.d(<4 x double>) + +define <4 x double> @lasx_xvfrintrne_d(<4 x double> %va) nounwind { +; CHECK-LABEL: lasx_xvfrintrne_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfrintrne.d $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x double> @llvm.loongarch.lasx.xvfrintrne.d(<4 x double> %va) + ret <4 x double> %res +} + +declare <8 x float> @llvm.loongarch.lasx.xvfrintrz.s(<8 x float>) + +define <8 x float> @lasx_xvfrintrz_s(<8 x float> %va) nounwind { +; CHECK-LABEL: lasx_xvfrintrz_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfrintrz.s $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <8 x float> @llvm.loongarch.lasx.xvfrintrz.s(<8 x float> %va) + ret <8 x float> %res +} + +declare <4 x double> @llvm.loongarch.lasx.xvfrintrz.d(<4 x double>) + +define <4 x double> @lasx_xvfrintrz_d(<4 x double> %va) nounwind { +; CHECK-LABEL: lasx_xvfrintrz_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfrintrz.d $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x double> @llvm.loongarch.lasx.xvfrintrz.d(<4 x double> %va) + ret <4 x double> %res +} + +declare <8 x float> @llvm.loongarch.lasx.xvfrintrp.s(<8 x float>) + +define <8 x float> @lasx_xvfrintrp_s(<8 x float> %va) nounwind { +; CHECK-LABEL: lasx_xvfrintrp_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfrintrp.s $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <8 x float> @llvm.loongarch.lasx.xvfrintrp.s(<8 x float> %va) + ret <8 x float> %res +} + +declare <4 x double> @llvm.loongarch.lasx.xvfrintrp.d(<4 x double>) + +define <4 x double> @lasx_xvfrintrp_d(<4 x double> %va) nounwind { +; CHECK-LABEL: lasx_xvfrintrp_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfrintrp.d $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x double> @llvm.loongarch.lasx.xvfrintrp.d(<4 x double> %va) + ret <4 x double> %res +} + +declare <8 x float> @llvm.loongarch.lasx.xvfrintrm.s(<8 x float>) + +define <8 x float> @lasx_xvfrintrm_s(<8 x float> %va) nounwind { +; CHECK-LABEL: lasx_xvfrintrm_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfrintrm.s $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <8 x float> @llvm.loongarch.lasx.xvfrintrm.s(<8 x float> %va) + ret <8 x float> %res +} + +declare <4 x double> @llvm.loongarch.lasx.xvfrintrm.d(<4 x double>) + +define <4 x double> @lasx_xvfrintrm_d(<4 x double> %va) nounwind { +; CHECK-LABEL: lasx_xvfrintrm_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfrintrm.d $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x double> @llvm.loongarch.lasx.xvfrintrm.d(<4 x double> %va) + ret <4 x double> %res +} + +declare <8 x float> @llvm.loongarch.lasx.xvfrint.s(<8 x float>) + +define <8 x float> @lasx_xvfrint_s(<8 x float> %va) nounwind { +; CHECK-LABEL: lasx_xvfrint_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfrint.s $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <8 x float> @llvm.loongarch.lasx.xvfrint.s(<8 x float> %va) + ret <8 x float> %res +} + +declare <4 x double> @llvm.loongarch.lasx.xvfrint.d(<4 x double>) + +define <4 x double> @lasx_xvfrint_d(<4 x double> %va) nounwind { +; CHECK-LABEL: lasx_xvfrint_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfrint.d $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x double> @llvm.loongarch.lasx.xvfrint.d(<4 x double> %va) + ret <4 x double> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frsqrt.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frsqrt.ll new file mode 100644 index 0000000000000000000000000000000000000000..6efa8122baf1804855516b559ddd5fd13f51e095 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frsqrt.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <8 x float> @llvm.loongarch.lasx.xvfrsqrt.s(<8 x float>) + +define <8 x float> @lasx_xvfrsqrt_s(<8 x float> %va) nounwind { +; CHECK-LABEL: lasx_xvfrsqrt_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfrsqrt.s $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <8 x float> @llvm.loongarch.lasx.xvfrsqrt.s(<8 x float> %va) + ret <8 x float> %res +} + +declare <4 x double> @llvm.loongarch.lasx.xvfrsqrt.d(<4 x double>) + +define <4 x double> @lasx_xvfrsqrt_d(<4 x double> %va) nounwind { +; CHECK-LABEL: lasx_xvfrsqrt_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfrsqrt.d $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x double> @llvm.loongarch.lasx.xvfrsqrt.d(<4 x double> %va) + ret <4 x double> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frstp-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frstp-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..64b4632669d29dc79bd38a0f8d273b5128e8db61 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frstp-invalid-imm.ll @@ -0,0 +1,33 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvfrstpi.b(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvfrstpi_b_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvfrstpi.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvfrstpi.b(<32 x i8> %va, <32 x i8> %vb, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvfrstpi_b_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvfrstpi.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvfrstpi.b(<32 x i8> %va, <32 x i8> %vb, i32 32) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvfrstpi.h(<16 x i16>, <16 x i16>, i32) + +define <16 x i16> @lasx_xvfrstpi_h_lo(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvfrstpi.h: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvfrstpi.h(<16 x i16> %va, <16 x i16> %vb, i32 -1) + ret <16 x i16> %res +} + +define <16 x i16> @lasx_xvfrstpi_h_hi(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvfrstpi.h: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvfrstpi.h(<16 x i16> %va, <16 x i16> %vb, i32 32) + ret <16 x i16> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frstp-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frstp-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..ca92cff9b2d1ecb76fc769254a9b55c7c3abc6d8 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frstp-non-imm.ll @@ -0,0 +1,19 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvfrstpi.b(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvfrstpi_b(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvfrstpi.b(<32 x i8> %va, <32 x i8> %vb, i32 %c) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvfrstpi.h(<16 x i16>, <16 x i16>, i32) + +define <16 x i16> @lasx_xvfrstpi_h(<16 x i16> %va, <16 x i16> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvfrstpi.h(<16 x i16> %va, <16 x i16> %vb, i32 %c) + ret <16 x i16> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frstp.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frstp.ll new file mode 100644 index 0000000000000000000000000000000000000000..e83e55a52a113dbc03a476fcd6c8ef9b901aa063 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-frstp.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvfrstp.b(<32 x i8>, <32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvfrstp_b(<32 x i8> %va, <32 x i8> %vb, <32 x i8> %vc) nounwind { +; CHECK-LABEL: lasx_xvfrstp_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfrstp.b $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvfrstp.b(<32 x i8> %va, <32 x i8> %vb, <32 x i8> %vc) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvfrstp.h(<16 x i16>, <16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvfrstp_h(<16 x i16> %va, <16 x i16> %vb, <16 x i16> %vc) nounwind { +; CHECK-LABEL: lasx_xvfrstp_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfrstp.h $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvfrstp.h(<16 x i16> %va, <16 x i16> %vb, <16 x i16> %vc) + ret <16 x i16> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvfrstpi.b(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvfrstpi_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvfrstpi_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfrstpi.b $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvfrstpi.b(<32 x i8> %va, <32 x i8> %vb, i32 1) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvfrstpi.h(<16 x i16>, <16 x i16>, i32) + +define <16 x i16> @lasx_xvfrstpi_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvfrstpi_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfrstpi.h $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvfrstpi.h(<16 x i16> %va, <16 x i16> %vb, i32 1) + ret <16 x i16> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fsqrt.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fsqrt.ll new file mode 100644 index 0000000000000000000000000000000000000000..a13333d8d81c29afe7f86417e9b2cb9e30a13059 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fsqrt.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <8 x float> @llvm.loongarch.lasx.xvfsqrt.s(<8 x float>) + +define <8 x float> @lasx_xvfsqrt_s(<8 x float> %va) nounwind { +; CHECK-LABEL: lasx_xvfsqrt_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfsqrt.s $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <8 x float> @llvm.loongarch.lasx.xvfsqrt.s(<8 x float> %va) + ret <8 x float> %res +} + +declare <4 x double> @llvm.loongarch.lasx.xvfsqrt.d(<4 x double>) + +define <4 x double> @lasx_xvfsqrt_d(<4 x double> %va) nounwind { +; CHECK-LABEL: lasx_xvfsqrt_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfsqrt.d $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x double> @llvm.loongarch.lasx.xvfsqrt.d(<4 x double> %va) + ret <4 x double> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fsub.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fsub.ll new file mode 100644 index 0000000000000000000000000000000000000000..b52774a03618ddbaff23857476eafac71a9e8b8e --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-fsub.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <8 x float> @llvm.loongarch.lasx.xvfsub.s(<8 x float>, <8 x float>) + +define <8 x float> @lasx_xvfsub_s(<8 x float> %va, <8 x float> %vb) nounwind { +; CHECK-LABEL: lasx_xvfsub_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfsub.s $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x float> @llvm.loongarch.lasx.xvfsub.s(<8 x float> %va, <8 x float> %vb) + ret <8 x float> %res +} + +declare <4 x double> @llvm.loongarch.lasx.xvfsub.d(<4 x double>, <4 x double>) + +define <4 x double> @lasx_xvfsub_d(<4 x double> %va, <4 x double> %vb) nounwind { +; CHECK-LABEL: lasx_xvfsub_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvfsub.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x double> @llvm.loongarch.lasx.xvfsub.d(<4 x double> %va, <4 x double> %vb) + ret <4 x double> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ftint.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ftint.ll new file mode 100644 index 0000000000000000000000000000000000000000..74cd507f16d2631fbb3d3a67842c50f081c32025 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ftint.ll @@ -0,0 +1,350 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.s(<8 x float>) + +define <8 x i32> @lasx_xvftintrne_w_s(<8 x float> %va) nounwind { +; CHECK-LABEL: lasx_xvftintrne_w_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvftintrne.w.s $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.s(<8 x float> %va) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvftintrne.l.d(<4 x double>) + +define <4 x i64> @lasx_xvftintrne_l_d(<4 x double> %va) nounwind { +; CHECK-LABEL: lasx_xvftintrne_l_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvftintrne.l.d $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvftintrne.l.d(<4 x double> %va) + ret <4 x i64> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.s(<8 x float>) + +define <8 x i32> @lasx_xvftintrz_w_s(<8 x float> %va) nounwind { +; CHECK-LABEL: lasx_xvftintrz_w_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvftintrz.w.s $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.s(<8 x float> %va) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvftintrz.l.d(<4 x double>) + +define <4 x i64> @lasx_xvftintrz_l_d(<4 x double> %va) nounwind { +; CHECK-LABEL: lasx_xvftintrz_l_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvftintrz.l.d $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvftintrz.l.d(<4 x double> %va) + ret <4 x i64> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.s(<8 x float>) + +define <8 x i32> @lasx_xvftintrp_w_s(<8 x float> %va) nounwind { +; CHECK-LABEL: lasx_xvftintrp_w_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvftintrp.w.s $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.s(<8 x float> %va) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvftintrp.l.d(<4 x double>) + +define <4 x i64> @lasx_xvftintrp_l_d(<4 x double> %va) nounwind { +; CHECK-LABEL: lasx_xvftintrp_l_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvftintrp.l.d $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvftintrp.l.d(<4 x double> %va) + ret <4 x i64> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.s(<8 x float>) + +define <8 x i32> @lasx_xvftintrm_w_s(<8 x float> %va) nounwind { +; CHECK-LABEL: lasx_xvftintrm_w_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvftintrm.w.s $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.s(<8 x float> %va) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvftintrm.l.d(<4 x double>) + +define <4 x i64> @lasx_xvftintrm_l_d(<4 x double> %va) nounwind { +; CHECK-LABEL: lasx_xvftintrm_l_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvftintrm.l.d $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvftintrm.l.d(<4 x double> %va) + ret <4 x i64> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvftint.w.s(<8 x float>) + +define <8 x i32> @lasx_xvftint_w_s(<8 x float> %va) nounwind { +; CHECK-LABEL: lasx_xvftint_w_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvftint.w.s $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvftint.w.s(<8 x float> %va) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvftint.l.d(<4 x double>) + +define <4 x i64> @lasx_xvftint_l_d(<4 x double> %va) nounwind { +; CHECK-LABEL: lasx_xvftint_l_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvftint.l.d $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvftint.l.d(<4 x double> %va) + ret <4 x i64> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvftintrz.wu.s(<8 x float>) + +define <8 x i32> @lasx_xvftintrz_wu_s(<8 x float> %va) nounwind { +; CHECK-LABEL: lasx_xvftintrz_wu_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvftintrz.wu.s $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvftintrz.wu.s(<8 x float> %va) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvftintrz.lu.d(<4 x double>) + +define <4 x i64> @lasx_xvftintrz_lu_d(<4 x double> %va) nounwind { +; CHECK-LABEL: lasx_xvftintrz_lu_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvftintrz.lu.d $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvftintrz.lu.d(<4 x double> %va) + ret <4 x i64> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvftint.wu.s(<8 x float>) + +define <8 x i32> @lasx_xvftint_wu_s(<8 x float> %va) nounwind { +; CHECK-LABEL: lasx_xvftint_wu_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvftint.wu.s $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvftint.wu.s(<8 x float> %va) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvftint.lu.d(<4 x double>) + +define <4 x i64> @lasx_xvftint_lu_d(<4 x double> %va) nounwind { +; CHECK-LABEL: lasx_xvftint_lu_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvftint.lu.d $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvftint.lu.d(<4 x double> %va) + ret <4 x i64> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.d(<4 x double>, <4 x double>) + +define <8 x i32> @lasx_xvftintrne_w_d(<4 x double> %va, <4 x double> %vb) nounwind { +; CHECK-LABEL: lasx_xvftintrne_w_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvftintrne.w.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.d(<4 x double> %va, <4 x double> %vb) + ret <8 x i32> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.d(<4 x double>, <4 x double>) + +define <8 x i32> @lasx_xvftintrz_w_d(<4 x double> %va, <4 x double> %vb) nounwind { +; CHECK-LABEL: lasx_xvftintrz_w_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvftintrz.w.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.d(<4 x double> %va, <4 x double> %vb) + ret <8 x i32> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.d(<4 x double>, <4 x double>) + +define <8 x i32> @lasx_xvftintrp_w_d(<4 x double> %va, <4 x double> %vb) nounwind { +; CHECK-LABEL: lasx_xvftintrp_w_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvftintrp.w.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.d(<4 x double> %va, <4 x double> %vb) + ret <8 x i32> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.d(<4 x double>, <4 x double>) + +define <8 x i32> @lasx_xvftintrm_w_d(<4 x double> %va, <4 x double> %vb) nounwind { +; CHECK-LABEL: lasx_xvftintrm_w_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvftintrm.w.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.d(<4 x double> %va, <4 x double> %vb) + ret <8 x i32> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvftint.w.d(<4 x double>, <4 x double>) + +define <8 x i32> @lasx_xvftint_w_d(<4 x double> %va, <4 x double> %vb) nounwind { +; CHECK-LABEL: lasx_xvftint_w_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvftint.w.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvftint.w.d(<4 x double> %va, <4 x double> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvftintrnel.l.s(<8 x float>) + +define <4 x i64> @lasx_xvftintrnel_l_s(<8 x float> %va) nounwind { +; CHECK-LABEL: lasx_xvftintrnel_l_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvftintrnel.l.s $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvftintrnel.l.s(<8 x float> %va) + ret <4 x i64> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvftintrneh.l.s(<8 x float>) + +define <4 x i64> @lasx_xvftintrneh_l_s(<8 x float> %va) nounwind { +; CHECK-LABEL: lasx_xvftintrneh_l_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvftintrneh.l.s $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvftintrneh.l.s(<8 x float> %va) + ret <4 x i64> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvftintrzl.l.s(<8 x float>) + +define <4 x i64> @lasx_xvftintrzl_l_s(<8 x float> %va) nounwind { +; CHECK-LABEL: lasx_xvftintrzl_l_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvftintrzl.l.s $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvftintrzl.l.s(<8 x float> %va) + ret <4 x i64> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvftintrzh.l.s(<8 x float>) + +define <4 x i64> @lasx_xvftintrzh_l_s(<8 x float> %va) nounwind { +; CHECK-LABEL: lasx_xvftintrzh_l_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvftintrzh.l.s $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvftintrzh.l.s(<8 x float> %va) + ret <4 x i64> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvftintrpl.l.s(<8 x float>) + +define <4 x i64> @lasx_xvftintrpl_l_s(<8 x float> %va) nounwind { +; CHECK-LABEL: lasx_xvftintrpl_l_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvftintrpl.l.s $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvftintrpl.l.s(<8 x float> %va) + ret <4 x i64> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvftintrph.l.s(<8 x float>) + +define <4 x i64> @lasx_xvftintrph_l_s(<8 x float> %va) nounwind { +; CHECK-LABEL: lasx_xvftintrph_l_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvftintrph.l.s $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvftintrph.l.s(<8 x float> %va) + ret <4 x i64> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvftintrml.l.s(<8 x float>) + +define <4 x i64> @lasx_xvftintrml_l_s(<8 x float> %va) nounwind { +; CHECK-LABEL: lasx_xvftintrml_l_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvftintrml.l.s $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvftintrml.l.s(<8 x float> %va) + ret <4 x i64> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvftintrmh.l.s(<8 x float>) + +define <4 x i64> @lasx_xvftintrmh_l_s(<8 x float> %va) nounwind { +; CHECK-LABEL: lasx_xvftintrmh_l_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvftintrmh.l.s $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvftintrmh.l.s(<8 x float> %va) + ret <4 x i64> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvftintl.l.s(<8 x float>) + +define <4 x i64> @lasx_xvftintl_l_s(<8 x float> %va) nounwind { +; CHECK-LABEL: lasx_xvftintl_l_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvftintl.l.s $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvftintl.l.s(<8 x float> %va) + ret <4 x i64> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvftinth.l.s(<8 x float>) + +define <4 x i64> @lasx_xvftinth_l_s(<8 x float> %va) nounwind { +; CHECK-LABEL: lasx_xvftinth_l_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvftinth.l.s $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvftinth.l.s(<8 x float> %va) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-haddw.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-haddw.ll new file mode 100644 index 0000000000000000000000000000000000000000..2c64ab23806b5c8c160fbfb4221b666b9390e138 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-haddw.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <16 x i16> @llvm.loongarch.lasx.xvhaddw.h.b(<32 x i8>, <32 x i8>) + +define <16 x i16> @lasx_xvhaddw_h_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvhaddw_h_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvhaddw.h.b $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvhaddw.h.b(<32 x i8> %va, <32 x i8> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvhaddw.w.h(<16 x i16>, <16 x i16>) + +define <8 x i32> @lasx_xvhaddw_w_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvhaddw_w_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvhaddw.w.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvhaddw.w.h(<16 x i16> %va, <16 x i16> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvhaddw.d.w(<8 x i32>, <8 x i32>) + +define <4 x i64> @lasx_xvhaddw_d_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvhaddw_d_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvhaddw.d.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvhaddw.d.w(<8 x i32> %va, <8 x i32> %vb) + ret <4 x i64> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvhaddw.q.d(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvhaddw_q_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvhaddw_q_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvhaddw.q.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvhaddw.q.d(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvhaddw.hu.bu(<32 x i8>, <32 x i8>) + +define <16 x i16> @lasx_xvhaddw_hu_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvhaddw_hu_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvhaddw.hu.bu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvhaddw.hu.bu(<32 x i8> %va, <32 x i8> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvhaddw.wu.hu(<16 x i16>, <16 x i16>) + +define <8 x i32> @lasx_xvhaddw_wu_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvhaddw_wu_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvhaddw.wu.hu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvhaddw.wu.hu(<16 x i16> %va, <16 x i16> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvhaddw.du.wu(<8 x i32>, <8 x i32>) + +define <4 x i64> @lasx_xvhaddw_du_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvhaddw_du_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvhaddw.du.wu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvhaddw.du.wu(<8 x i32> %va, <8 x i32> %vb) + ret <4 x i64> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvhaddw.qu.du(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvhaddw_qu_du(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvhaddw_qu_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvhaddw.qu.du $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvhaddw.qu.du(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-hsubw.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-hsubw.ll new file mode 100644 index 0000000000000000000000000000000000000000..a5223c1d89a04413a0f7c5fa852db8df6dfe1080 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-hsubw.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <16 x i16> @llvm.loongarch.lasx.xvhsubw.h.b(<32 x i8>, <32 x i8>) + +define <16 x i16> @lasx_xvhsubw_h_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvhsubw_h_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvhsubw.h.b $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvhsubw.h.b(<32 x i8> %va, <32 x i8> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvhsubw.w.h(<16 x i16>, <16 x i16>) + +define <8 x i32> @lasx_xvhsubw_w_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvhsubw_w_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvhsubw.w.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvhsubw.w.h(<16 x i16> %va, <16 x i16> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvhsubw.d.w(<8 x i32>, <8 x i32>) + +define <4 x i64> @lasx_xvhsubw_d_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvhsubw_d_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvhsubw.d.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvhsubw.d.w(<8 x i32> %va, <8 x i32> %vb) + ret <4 x i64> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvhsubw.q.d(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvhsubw_q_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvhsubw_q_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvhsubw.q.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvhsubw.q.d(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvhsubw.hu.bu(<32 x i8>, <32 x i8>) + +define <16 x i16> @lasx_xvhsubw_hu_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvhsubw_hu_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvhsubw.hu.bu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvhsubw.hu.bu(<32 x i8> %va, <32 x i8> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvhsubw.wu.hu(<16 x i16>, <16 x i16>) + +define <8 x i32> @lasx_xvhsubw_wu_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvhsubw_wu_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvhsubw.wu.hu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvhsubw.wu.hu(<16 x i16> %va, <16 x i16> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvhsubw.du.wu(<8 x i32>, <8 x i32>) + +define <4 x i64> @lasx_xvhsubw_du_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvhsubw_du_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvhsubw.du.wu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvhsubw.du.wu(<8 x i32> %va, <8 x i32> %vb) + ret <4 x i64> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvhsubw.qu.du(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvhsubw_qu_du(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvhsubw_qu_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvhsubw.qu.du $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvhsubw.qu.du(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ilv.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ilv.ll new file mode 100644 index 0000000000000000000000000000000000000000..c9d0ca6b0324a205f56721ec4b373e6f07360279 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ilv.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvilvl.b(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvilvl_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvilvl_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvilvl.b $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvilvl.b(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvilvl.h(<16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvilvl_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvilvl_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvilvl.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvilvl.h(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvilvl.w(<8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvilvl_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvilvl_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvilvl.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvilvl.w(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvilvl.d(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvilvl_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvilvl_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvilvl.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvilvl.d(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvilvh.b(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvilvh_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvilvh_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvilvh.b $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvilvh.b(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvilvh.h(<16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvilvh_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvilvh_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvilvh.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvilvh.h(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvilvh.w(<8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvilvh_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvilvh_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvilvh.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvilvh.w(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvilvh.d(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvilvh_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvilvh_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvilvh.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvilvh.d(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insgr2vr-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insgr2vr-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..4982f2c7d43a9237568513d262c0e569a949ac21 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insgr2vr-invalid-imm.ll @@ -0,0 +1,33 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w(<8 x i32>, i32, i32) + +define <8 x i32> @lasx_xvinsgr2vr_w_lo(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvinsgr2vr.w: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w(<8 x i32> %va, i32 1, i32 -1) + ret <8 x i32> %res +} + +define <8 x i32> @lasx_xvinsgr2vr_w_hi(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvinsgr2vr.w: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w(<8 x i32> %va, i32 1, i32 8) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d(<4 x i64>, i64, i32) + +define <4 x i64> @lasx_xvinsgr2vr_d_lo(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvinsgr2vr.d: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d(<4 x i64> %va, i64 1, i32 -1) + ret <4 x i64> %res +} + +define <4 x i64> @lasx_xvinsgr2vr_d_hi(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvinsgr2vr.d: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d(<4 x i64> %va, i64 1, i32 4) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insgr2vr-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insgr2vr-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..3accabf6dbd9892c8576193969e196ebea29be12 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insgr2vr-non-imm.ll @@ -0,0 +1,19 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w(<8 x i32>, i32, i32) + +define <8 x i32> @lasx_xvinsgr2vr_w(<8 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w(<8 x i32> %va, i32 1, i32 %b) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d(<4 x i64>, i64, i32) + +define <4 x i64> @lasx_xvinsgr2vr_d(<4 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d(<4 x i64> %va, i64 1, i32 %b) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insgr2vr.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insgr2vr.ll new file mode 100644 index 0000000000000000000000000000000000000000..ea98c96464aed825b02547924c89977ed123a7c5 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insgr2vr.ll @@ -0,0 +1,28 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w(<8 x i32>, i32, i32) + +define <8 x i32> @lasx_xvinsgr2vr_w(<8 x i32> %va) nounwind { +; CHECK-LABEL: lasx_xvinsgr2vr_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ori $a0, $zero, 1 +; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w(<8 x i32> %va, i32 1, i32 1) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d(<4 x i64>, i64, i32) + +define <4 x i64> @lasx_xvinsgr2vr_d(<4 x i64> %va) nounwind { +; CHECK-LABEL: lasx_xvinsgr2vr_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ori $a0, $zero, 1 +; CHECK-NEXT: xvinsgr2vr.d $xr0, $a0, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d(<4 x i64> %va, i64 1, i32 1) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insve0-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insve0-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..a54fa8515fbafe91a9d338467481e13304db4c7f --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insve0-invalid-imm.ll @@ -0,0 +1,33 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <8 x i32> @llvm.loongarch.lasx.xvinsve0.w(<8 x i32>, <8 x i32>, i32) + +define <8 x i32> @lasx_xvinsve0_w_lo(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvinsve0.w: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvinsve0.w(<8 x i32> %va, <8 x i32> %vb, i32 -1) + ret <8 x i32> %res +} + +define <8 x i32> @lasx_xvinsve0_w_hi(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvinsve0.w: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvinsve0.w(<8 x i32> %va, <8 x i32> %vb, i32 8) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvinsve0.d(<4 x i64>, <4 x i64>, i32) + +define <4 x i64> @lasx_xvinsve0_d_lo(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvinsve0.d: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvinsve0.d(<4 x i64> %va, <4 x i64> %vb, i32 -1) + ret <4 x i64> %res +} + +define <4 x i64> @lasx_xvinsve0_d_hi(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvinsve0.d: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvinsve0.d(<4 x i64> %va, <4 x i64> %vb, i32 4) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insve0-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insve0-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..53e59db11aa69108e4e7279c1d4963a02e2bb521 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insve0-non-imm.ll @@ -0,0 +1,19 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <8 x i32> @llvm.loongarch.lasx.xvinsve0.w(<8 x i32>, <8 x i32>, i32) + +define <8 x i32> @lasx_xvinsve0_w(<8 x i32> %va, <8 x i32> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvinsve0.w(<8 x i32> %va, <8 x i32> %vb, i32 %c) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvinsve0.d(<4 x i64>, <4 x i64>, i32) + +define <4 x i64> @lasx_xvinsve0_d(<4 x i64> %va, <4 x i64> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvinsve0.d(<4 x i64> %va, <4 x i64> %vb, i32 %c) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insve0.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insve0.ll new file mode 100644 index 0000000000000000000000000000000000000000..27ae819c4144c573088aa52c31331b0cf15da634 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-insve0.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <8 x i32> @llvm.loongarch.lasx.xvinsve0.w(<8 x i32>, <8 x i32>, i32) + +define <8 x i32> @lasx_xvinsve0_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvinsve0_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvinsve0.w $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvinsve0.w(<8 x i32> %va, <8 x i32> %vb, i32 1) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvinsve0.d(<4 x i64>, <4 x i64>, i32) + +define <4 x i64> @lasx_xvinsve0_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvinsve0_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvinsve0.d $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvinsve0.d(<4 x i64> %va, <4 x i64> %vb, i32 1) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ld-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ld-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..20dd8a45d7f02ec3e954b87aece413aa5cbc78a9 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ld-invalid-imm.ll @@ -0,0 +1,17 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvld(i8*, i32) + +define <32 x i8> @lasx_xvld_lo(i8* %p) nounwind { +; CHECK: llvm.loongarch.lasx.xvld: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvld(i8* %p, i32 -2049) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvld_hi(i8* %p) nounwind { +; CHECK: llvm.loongarch.lasx.xvld: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvld(i8* %p, i32 2048) + ret <32 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ld-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ld-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..b23436a4483235e7e14cf33a7a2acdedd4b06cfc --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ld-non-imm.ll @@ -0,0 +1,10 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvld(i8*, i32) + +define <32 x i8> @lasx_xvld(i8* %p, i32 %a) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvld(i8* %p, i32 %a) + ret <32 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ld.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ld.ll new file mode 100644 index 0000000000000000000000000000000000000000..5ffc629db4668bf1409b73e486ce5287e471d62f --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ld.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvld(i8*, i32) + +define <32 x i8> @lasx_xvld(i8* %p) nounwind { +; CHECK-LABEL: lasx_xvld: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a0, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvld(i8* %p, i32 1) + ret <32 x i8> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvldx(i8*, i64) + +define <32 x i8> @lasx_xvldx(i8* %p, i64 %b) nounwind { +; CHECK-LABEL: lasx_xvldx: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvldx $xr0, $a0, $a1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvldx(i8* %p, i64 %b) + ret <32 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldi-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldi-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..f3dd3650cf8a409b533e900b26ad940708a9fc35 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldi-invalid-imm.ll @@ -0,0 +1,81 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <4 x i64> @llvm.loongarch.lasx.xvldi(i32) + +define <4 x i64> @lasx_xvldi_lo() nounwind { +; CHECK: llvm.loongarch.lasx.xvldi: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvldi(i32 -4097) + ret <4 x i64> %res +} + +define <4 x i64> @lasx_xvldi_hi() nounwind { +; CHECK: llvm.loongarch.lasx.xvldi: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvldi(i32 4096) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvrepli.b(i32) + +define <32 x i8> @lasx_xvrepli_b_lo() nounwind { +; CHECK: llvm.loongarch.lasx.xvrepli.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvrepli.b(i32 -513) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvrepli_b_hi() nounwind { +; CHECK: llvm.loongarch.lasx.xvrepli.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvrepli.b(i32 512) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvrepli.h(i32) + +define <16 x i16> @lasx_xvrepli_h_lo() nounwind { +; CHECK: llvm.loongarch.lasx.xvrepli.h: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvrepli.h(i32 -513) + ret <16 x i16> %res +} + +define <16 x i16> @lasx_xvrepli_h_hi() nounwind { +; CHECK: llvm.loongarch.lasx.xvrepli.h: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvrepli.h(i32 512) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvrepli.w(i32) + +define <8 x i32> @lasx_xvrepli_w_lo() nounwind { +; CHECK: llvm.loongarch.lasx.xvrepli.w: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvrepli.w(i32 -513) + ret <8 x i32> %res +} + +define <8 x i32> @lasx_xvrepli_w_hi() nounwind { +; CHECK: llvm.loongarch.lasx.xvrepli.w: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvrepli.w(i32 512) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvrepli.d(i32) + +define <4 x i64> @lasx_xvrepli_d_lo() nounwind { +; CHECK: llvm.loongarch.lasx.xvrepli.d: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvrepli.d(i32 -513) + ret <4 x i64> %res +} + +define <4 x i64> @lasx_xvrepli_d_hi() nounwind { +; CHECK: llvm.loongarch.lasx.xvrepli.d: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvrepli.d(i32 512) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldi-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldi-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..6466818bf674b3976bd0931de9d356dabd333a97 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldi-non-imm.ll @@ -0,0 +1,46 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <4 x i64> @llvm.loongarch.lasx.xvldi(i32) + +define <4 x i64> @lasx_xvldi(i32 %a) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvldi(i32 %a) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvrepli.b(i32) + +define <32 x i8> @lasx_xvrepli_b(i32 %a) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvrepli.b(i32 %a) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvrepli.h(i32) + +define <16 x i16> @lasx_xvrepli_h(i32 %a) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvrepli.h(i32 %a) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvrepli.w(i32) + +define <8 x i32> @lasx_xvrepli_w(i32 %a) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvrepli.w(i32 %a) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvrepli.d(i32) + +define <4 x i64> @lasx_xvrepli_d(i32 %a) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvrepli.d(i32 %a) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldi.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldi.ll new file mode 100644 index 0000000000000000000000000000000000000000..59f79dd32af367f56f7a1930d51e2b175505d32b --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldi.ll @@ -0,0 +1,62 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <4 x i64> @llvm.loongarch.lasx.xvldi(i32) + +define <4 x i64> @lasx_xvldi() nounwind { +; CHECK-LABEL: lasx_xvldi: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvldi $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvldi(i32 1) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvrepli.b(i32) + +define <32 x i8> @lasx_xvrepli_b() nounwind { +; CHECK-LABEL: lasx_xvrepli_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvrepli.b $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvrepli.b(i32 1) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvrepli.h(i32) + +define <16 x i16> @lasx_xvrepli_h() nounwind { +; CHECK-LABEL: lasx_xvrepli_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvrepli.h $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvrepli.h(i32 1) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvrepli.w(i32) + +define <8 x i32> @lasx_xvrepli_w() nounwind { +; CHECK-LABEL: lasx_xvrepli_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvrepli.w $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvrepli.w(i32 1) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvrepli.d(i32) + +define <4 x i64> @lasx_xvrepli_d() nounwind { +; CHECK-LABEL: lasx_xvrepli_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvrepli.d $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvrepli.d(i32 1) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldrepl-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldrepl-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..cb62a839985a32c2b3ac0228253fe6af4187166b --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldrepl-invalid-imm.ll @@ -0,0 +1,65 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(i8*, i32) + +define <32 x i8> @lasx_xvldrepl_b_lo(i8* %p) nounwind { +; CHECK: llvm.loongarch.lasx.xvldrepl.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(i8* %p, i32 -2049) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvldrepl_b_hi(i8* %p) nounwind { +; CHECK: llvm.loongarch.lasx.xvldrepl.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(i8* %p, i32 2048) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvldrepl.h(i8*, i32) + +define <16 x i16> @lasx_xvldrepl_h_lo(i8* %p) nounwind { +; CHECK: llvm.loongarch.lasx.xvldrepl.h: argument out of range or not a multiple of 2. +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvldrepl.h(i8* %p, i32 -2050) + ret <16 x i16> %res +} + +define <16 x i16> @lasx_xvldrepl_h_hi(i8* %p) nounwind { +; CHECK: llvm.loongarch.lasx.xvldrepl.h: argument out of range or not a multiple of 2. +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvldrepl.h(i8* %p, i32 2048) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvldrepl.w(i8*, i32) + +define <8 x i32> @lasx_xvldrepl_w_lo(i8* %p) nounwind { +; CHECK: llvm.loongarch.lasx.xvldrepl.w: argument out of range or not a multiple of 4. +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvldrepl.w(i8* %p, i32 -2052) + ret <8 x i32> %res +} + +define <8 x i32> @lasx_xvldrepl_w_hi(i8* %p) nounwind { +; CHECK: llvm.loongarch.lasx.xvldrepl.w: argument out of range or not a multiple of 4. +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvldrepl.w(i8* %p, i32 2048) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvldrepl.d(i8*, i32) + +define <4 x i64> @lasx_xvldrepl_d_lo(i8* %p) nounwind { +; CHECK: llvm.loongarch.lasx.xvldrepl.d: argument out of range or not a multiple of 8. +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvldrepl.d(i8* %p, i32 -2056) + ret <4 x i64> %res +} + +define <4 x i64> @lasx_xvldrepl_d_hi(i8* %p) nounwind { +; CHECK: llvm.loongarch.lasx.xvldrepl.d: argument out of range or not a multiple of 8. +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvldrepl.d(i8* %p, i32 2048) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldrepl-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldrepl-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..075d663b0dd7af52bab80a2c9466a9ef8340322a --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldrepl-non-imm.ll @@ -0,0 +1,37 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(i8*, i32) + +define <32 x i8> @lasx_xvldrepl_b(i8* %p, i32 %a) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(i8* %p, i32 %a) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvldrepl.h(i8*, i32) + +define <16 x i16> @lasx_xvldrepl_h(i8* %p, i32 %a) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvldrepl.h(i8* %p, i32 %a) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvldrepl.w(i8*, i32) + +define <8 x i32> @lasx_xvldrepl_w(i8* %p, i32 %a) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvldrepl.w(i8* %p, i32 %a) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvldrepl.d(i8*, i32) + +define <4 x i64> @lasx_xvldrepl_d(i8* %p, i32 %a) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvldrepl.d(i8* %p, i32 %a) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldrepl.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldrepl.ll new file mode 100644 index 0000000000000000000000000000000000000000..ae6abdf81cbc58dc93fb88c3300cef8d36819d34 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ldrepl.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(i8*, i32) + +define <32 x i8> @lasx_xvldrepl_b(i8* %p) nounwind { +; CHECK-LABEL: lasx_xvldrepl_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvldrepl.b $xr0, $a0, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(i8* %p, i32 1) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvldrepl.h(i8*, i32) + +define <16 x i16> @lasx_xvldrepl_h(i8* %p) nounwind { +; CHECK-LABEL: lasx_xvldrepl_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvldrepl.h $xr0, $a0, 2 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvldrepl.h(i8* %p, i32 2) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvldrepl.w(i8*, i32) + +define <8 x i32> @lasx_xvldrepl_w(i8* %p) nounwind { +; CHECK-LABEL: lasx_xvldrepl_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvldrepl.w $xr0, $a0, 4 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvldrepl.w(i8* %p, i32 4) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvldrepl.d(i8*, i32) + +define <4 x i64> @lasx_xvldrepl_d(i8* %p) nounwind { +; CHECK-LABEL: lasx_xvldrepl_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvldrepl.d $xr0, $a0, 8 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvldrepl.d(i8* %p, i32 8) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-madd.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-madd.ll new file mode 100644 index 0000000000000000000000000000000000000000..d3b09396727e7919007f091034f21fdeed3ff798 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-madd.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvmadd.b(<32 x i8>, <32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvmadd_b(<32 x i8> %va, <32 x i8> %vb, <32 x i8> %vc) nounwind { +; CHECK-LABEL: lasx_xvmadd_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmadd.b $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvmadd.b(<32 x i8> %va, <32 x i8> %vb, <32 x i8> %vc) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvmadd.h(<16 x i16>, <16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvmadd_h(<16 x i16> %va, <16 x i16> %vb, <16 x i16> %vc) nounwind { +; CHECK-LABEL: lasx_xvmadd_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmadd.h $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvmadd.h(<16 x i16> %va, <16 x i16> %vb, <16 x i16> %vc) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvmadd.w(<8 x i32>, <8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvmadd_w(<8 x i32> %va, <8 x i32> %vb, <8 x i32> %vc) nounwind { +; CHECK-LABEL: lasx_xvmadd_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmadd.w $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvmadd.w(<8 x i32> %va, <8 x i32> %vb, <8 x i32> %vc) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmadd.d(<4 x i64>, <4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvmadd_d(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) nounwind { +; CHECK-LABEL: lasx_xvmadd_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmadd.d $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmadd.d(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-maddw.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-maddw.ll new file mode 100644 index 0000000000000000000000000000000000000000..146624a764a22e325ddf13375250fd034422b7ff --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-maddw.ll @@ -0,0 +1,290 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.b(<16 x i16>, <32 x i8>, <32 x i8>) + +define <16 x i16> @lasx_xvmaddwev_h_b(<16 x i16> %va, <32 x i8> %vb, <32 x i8> %vc) nounwind { +; CHECK-LABEL: lasx_xvmaddwev_h_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmaddwev.h.b $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.b(<16 x i16> %va, <32 x i8> %vb, <32 x i8> %vc) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.h(<8 x i32>, <16 x i16>, <16 x i16>) + +define <8 x i32> @lasx_xvmaddwev_w_h(<8 x i32> %va, <16 x i16> %vb, <16 x i16> %vc) nounwind { +; CHECK-LABEL: lasx_xvmaddwev_w_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmaddwev.w.h $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.h(<8 x i32> %va, <16 x i16> %vb, <16 x i16> %vc) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.w(<4 x i64>, <8 x i32>, <8 x i32>) + +define <4 x i64> @lasx_xvmaddwev_d_w(<4 x i64> %va, <8 x i32> %vb, <8 x i32> %vc) nounwind { +; CHECK-LABEL: lasx_xvmaddwev_d_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmaddwev.d.w $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.w(<4 x i64> %va, <8 x i32> %vb, <8 x i32> %vc) + ret <4 x i64> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.d(<4 x i64>, <4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvmaddwev_q_d(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) nounwind { +; CHECK-LABEL: lasx_xvmaddwev_q_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmaddwev.q.d $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.d(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) + ret <4 x i64> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu(<16 x i16>, <32 x i8>, <32 x i8>) + +define <16 x i16> @lasx_xvmaddwev_h_bu(<16 x i16> %va, <32 x i8> %vb, <32 x i8> %vc) nounwind { +; CHECK-LABEL: lasx_xvmaddwev_h_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmaddwev.h.bu $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu(<16 x i16> %va, <32 x i8> %vb, <32 x i8> %vc) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu(<8 x i32>, <16 x i16>, <16 x i16>) + +define <8 x i32> @lasx_xvmaddwev_w_hu(<8 x i32> %va, <16 x i16> %vb, <16 x i16> %vc) nounwind { +; CHECK-LABEL: lasx_xvmaddwev_w_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmaddwev.w.hu $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu(<8 x i32> %va, <16 x i16> %vb, <16 x i16> %vc) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu(<4 x i64>, <8 x i32>, <8 x i32>) + +define <4 x i64> @lasx_xvmaddwev_d_wu(<4 x i64> %va, <8 x i32> %vb, <8 x i32> %vc) nounwind { +; CHECK-LABEL: lasx_xvmaddwev_d_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmaddwev.d.wu $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu(<4 x i64> %va, <8 x i32> %vb, <8 x i32> %vc) + ret <4 x i64> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du(<4 x i64>, <4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvmaddwev_q_du(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) nounwind { +; CHECK-LABEL: lasx_xvmaddwev_q_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmaddwev.q.du $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) + ret <4 x i64> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu.b(<16 x i16>, <32 x i8>, <32 x i8>) + +define <16 x i16> @lasx_xvmaddwev_h_bu_b(<16 x i16> %va, <32 x i8> %vb, <32 x i8> %vc) nounwind { +; CHECK-LABEL: lasx_xvmaddwev_h_bu_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmaddwev.h.bu.b $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu.b(<16 x i16> %va, <32 x i8> %vb, <32 x i8> %vc) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu.h(<8 x i32>, <16 x i16>, <16 x i16>) + +define <8 x i32> @lasx_xvmaddwev_w_hu_h(<8 x i32> %va, <16 x i16> %vb, <16 x i16> %vc) nounwind { +; CHECK-LABEL: lasx_xvmaddwev_w_hu_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmaddwev.w.hu.h $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu.h(<8 x i32> %va, <16 x i16> %vb, <16 x i16> %vc) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu.w(<4 x i64>, <8 x i32>, <8 x i32>) + +define <4 x i64> @lasx_xvmaddwev_d_wu_w(<4 x i64> %va, <8 x i32> %vb, <8 x i32> %vc) nounwind { +; CHECK-LABEL: lasx_xvmaddwev_d_wu_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmaddwev.d.wu.w $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu.w(<4 x i64> %va, <8 x i32> %vb, <8 x i32> %vc) + ret <4 x i64> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du.d(<4 x i64>, <4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvmaddwev_q_du_d(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) nounwind { +; CHECK-LABEL: lasx_xvmaddwev_q_du_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmaddwev.q.du.d $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du.d(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) + ret <4 x i64> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.b(<16 x i16>, <32 x i8>, <32 x i8>) + +define <16 x i16> @lasx_xvmaddwod_h_b(<16 x i16> %va, <32 x i8> %vb, <32 x i8> %vc) nounwind { +; CHECK-LABEL: lasx_xvmaddwod_h_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmaddwod.h.b $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.b(<16 x i16> %va, <32 x i8> %vb, <32 x i8> %vc) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.h(<8 x i32>, <16 x i16>, <16 x i16>) + +define <8 x i32> @lasx_xvmaddwod_w_h(<8 x i32> %va, <16 x i16> %vb, <16 x i16> %vc) nounwind { +; CHECK-LABEL: lasx_xvmaddwod_w_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmaddwod.w.h $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.h(<8 x i32> %va, <16 x i16> %vb, <16 x i16> %vc) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.w(<4 x i64>, <8 x i32>, <8 x i32>) + +define <4 x i64> @lasx_xvmaddwod_d_w(<4 x i64> %va, <8 x i32> %vb, <8 x i32> %vc) nounwind { +; CHECK-LABEL: lasx_xvmaddwod_d_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmaddwod.d.w $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.w(<4 x i64> %va, <8 x i32> %vb, <8 x i32> %vc) + ret <4 x i64> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.d(<4 x i64>, <4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvmaddwod_q_d(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) nounwind { +; CHECK-LABEL: lasx_xvmaddwod_q_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmaddwod.q.d $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.d(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) + ret <4 x i64> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu(<16 x i16>, <32 x i8>, <32 x i8>) + +define <16 x i16> @lasx_xvmaddwod_h_bu(<16 x i16> %va, <32 x i8> %vb, <32 x i8> %vc) nounwind { +; CHECK-LABEL: lasx_xvmaddwod_h_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmaddwod.h.bu $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu(<16 x i16> %va, <32 x i8> %vb, <32 x i8> %vc) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu(<8 x i32>, <16 x i16>, <16 x i16>) + +define <8 x i32> @lasx_xvmaddwod_w_hu(<8 x i32> %va, <16 x i16> %vb, <16 x i16> %vc) nounwind { +; CHECK-LABEL: lasx_xvmaddwod_w_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmaddwod.w.hu $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu(<8 x i32> %va, <16 x i16> %vb, <16 x i16> %vc) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu(<4 x i64>, <8 x i32>, <8 x i32>) + +define <4 x i64> @lasx_xvmaddwod_d_wu(<4 x i64> %va, <8 x i32> %vb, <8 x i32> %vc) nounwind { +; CHECK-LABEL: lasx_xvmaddwod_d_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmaddwod.d.wu $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu(<4 x i64> %va, <8 x i32> %vb, <8 x i32> %vc) + ret <4 x i64> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du(<4 x i64>, <4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvmaddwod_q_du(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) nounwind { +; CHECK-LABEL: lasx_xvmaddwod_q_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmaddwod.q.du $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) + ret <4 x i64> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu.b(<16 x i16>, <32 x i8>, <32 x i8>) + +define <16 x i16> @lasx_xvmaddwod_h_bu_b(<16 x i16> %va, <32 x i8> %vb, <32 x i8> %vc) nounwind { +; CHECK-LABEL: lasx_xvmaddwod_h_bu_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmaddwod.h.bu.b $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu.b(<16 x i16> %va, <32 x i8> %vb, <32 x i8> %vc) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu.h(<8 x i32>, <16 x i16>, <16 x i16>) + +define <8 x i32> @lasx_xvmaddwod_w_hu_h(<8 x i32> %va, <16 x i16> %vb, <16 x i16> %vc) nounwind { +; CHECK-LABEL: lasx_xvmaddwod_w_hu_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmaddwod.w.hu.h $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu.h(<8 x i32> %va, <16 x i16> %vb, <16 x i16> %vc) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu.w(<4 x i64>, <8 x i32>, <8 x i32>) + +define <4 x i64> @lasx_xvmaddwod_d_wu_w(<4 x i64> %va, <8 x i32> %vb, <8 x i32> %vc) nounwind { +; CHECK-LABEL: lasx_xvmaddwod_d_wu_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmaddwod.d.wu.w $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu.w(<4 x i64> %va, <8 x i32> %vb, <8 x i32> %vc) + ret <4 x i64> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du.d(<4 x i64>, <4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvmaddwod_q_du_d(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) nounwind { +; CHECK-LABEL: lasx_xvmaddwod_q_du_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmaddwod.q.du.d $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du.d(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-max-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-max-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..a671e9979b2febf56334b9f6608c03da4f911f23 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-max-invalid-imm.ll @@ -0,0 +1,129 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvmaxi.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvmaxi_b_lo(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvmaxi.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvmaxi.b(<32 x i8> %va, i32 -17) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvmaxi_b_hi(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvmaxi.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvmaxi.b(<32 x i8> %va, i32 16) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvmaxi.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvmaxi_h_lo(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvmaxi.h: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvmaxi.h(<16 x i16> %va, i32 -17) + ret <16 x i16> %res +} + +define <16 x i16> @lasx_xvmaxi_h_hi(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvmaxi.h: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvmaxi.h(<16 x i16> %va, i32 16) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvmaxi.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvmaxi_w_lo(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvmaxi.w: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvmaxi.w(<8 x i32> %va, i32 -17) + ret <8 x i32> %res +} + +define <8 x i32> @lasx_xvmaxi_w_hi(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvmaxi.w: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvmaxi.w(<8 x i32> %va, i32 16) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmaxi.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvmaxi_d_lo(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvmaxi.d: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmaxi.d(<4 x i64> %va, i32 -17) + ret <4 x i64> %res +} + +define <4 x i64> @lasx_xvmaxi_d_hi(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvmaxi.d: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmaxi.d(<4 x i64> %va, i32 16) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvmaxi.bu(<32 x i8>, i32) + +define <32 x i8> @lasx_xvmaxi_bu_lo(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvmaxi.bu: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvmaxi.bu(<32 x i8> %va, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvmaxi_bu_hi(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvmaxi.bu: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvmaxi.bu(<32 x i8> %va, i32 32) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvmaxi.hu(<16 x i16>, i32) + +define <16 x i16> @lasx_xvmaxi_hu_lo(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvmaxi.hu: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvmaxi.hu(<16 x i16> %va, i32 -1) + ret <16 x i16> %res +} + +define <16 x i16> @lasx_xvmaxi_hu_hi(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvmaxi.hu: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvmaxi.hu(<16 x i16> %va, i32 32) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvmaxi.wu(<8 x i32>, i32) + +define <8 x i32> @lasx_xvmaxi_wu_lo(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvmaxi.wu: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvmaxi.wu(<8 x i32> %va, i32 -1) + ret <8 x i32> %res +} + +define <8 x i32> @lasx_xvmaxi_wu_hi(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvmaxi.wu: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvmaxi.wu(<8 x i32> %va, i32 32) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmaxi.du(<4 x i64>, i32) + +define <4 x i64> @lasx_xvmaxi_du_lo(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvmaxi.du: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmaxi.du(<4 x i64> %va, i32 -1) + ret <4 x i64> %res +} + +define <4 x i64> @lasx_xvmaxi_du_hi(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvmaxi.du: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmaxi.du(<4 x i64> %va, i32 32) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-max-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-max-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..b85798b53c92d178d6b012b9027902444f337451 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-max-non-imm.ll @@ -0,0 +1,73 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvmaxi.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvmaxi_b(<32 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvmaxi.b(<32 x i8> %va, i32 %b) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvmaxi.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvmaxi_h(<16 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvmaxi.h(<16 x i16> %va, i32 %b) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvmaxi.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvmaxi_w(<8 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvmaxi.w(<8 x i32> %va, i32 %b) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmaxi.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvmaxi_d(<4 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmaxi.d(<4 x i64> %va, i32 %b) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvmaxi.bu(<32 x i8>, i32) + +define <32 x i8> @lasx_xvmaxi_bu(<32 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvmaxi.bu(<32 x i8> %va, i32 %b) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvmaxi.hu(<16 x i16>, i32) + +define <16 x i16> @lasx_xvmaxi_hu(<16 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvmaxi.hu(<16 x i16> %va, i32 %b) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvmaxi.wu(<8 x i32>, i32) + +define <8 x i32> @lasx_xvmaxi_wu(<8 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvmaxi.wu(<8 x i32> %va, i32 %b) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmaxi.du(<4 x i64>, i32) + +define <4 x i64> @lasx_xvmaxi_du(<4 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmaxi.du(<4 x i64> %va, i32 %b) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-max.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-max.ll new file mode 100644 index 0000000000000000000000000000000000000000..9cf09df4439ad924086ef3388bf60e37b4856430 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-max.ll @@ -0,0 +1,194 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvmax.b(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvmax_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvmax_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmax.b $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvmax.b(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvmax.h(<16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvmax_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvmax_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmax.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvmax.h(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvmax.w(<8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvmax_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvmax_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmax.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvmax.w(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmax.d(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvmax_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvmax_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmax.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmax.d(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvmaxi.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvmaxi_b(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xvmaxi_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmaxi.b $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvmaxi.b(<32 x i8> %va, i32 1) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvmaxi.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvmaxi_h(<16 x i16> %va) nounwind { +; CHECK-LABEL: lasx_xvmaxi_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmaxi.h $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvmaxi.h(<16 x i16> %va, i32 1) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvmaxi.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvmaxi_w(<8 x i32> %va) nounwind { +; CHECK-LABEL: lasx_xvmaxi_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmaxi.w $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvmaxi.w(<8 x i32> %va, i32 1) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmaxi.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvmaxi_d(<4 x i64> %va) nounwind { +; CHECK-LABEL: lasx_xvmaxi_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmaxi.d $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmaxi.d(<4 x i64> %va, i32 1) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvmax.bu(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_vmax_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_vmax_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmax.bu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvmax.bu(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvmax.hu(<16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvmax_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvmax_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmax.hu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvmax.hu(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvmax.wu(<8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvmax_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvmax_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmax.wu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvmax.wu(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmax.du(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvmax_du(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvmax_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmax.du $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmax.du(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvmaxi.bu(<32 x i8>, i32) + +define <32 x i8> @lasx_xvmaxi_bu(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xvmaxi_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmaxi.bu $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvmaxi.bu(<32 x i8> %va, i32 1) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvmaxi.hu(<16 x i16>, i32) + +define <16 x i16> @lasx_xvmaxi_hu(<16 x i16> %va) nounwind { +; CHECK-LABEL: lasx_xvmaxi_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmaxi.hu $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvmaxi.hu(<16 x i16> %va, i32 1) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvmaxi.wu(<8 x i32>, i32) + +define <8 x i32> @lasx_xvmaxi_wu(<8 x i32> %va) nounwind { +; CHECK-LABEL: lasx_xvmaxi_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmaxi.wu $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvmaxi.wu(<8 x i32> %va, i32 1) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmaxi.du(<4 x i64>, i32) + +define <4 x i64> @lasx_xvmaxi_du(<4 x i64> %va) nounwind { +; CHECK-LABEL: lasx_xvmaxi_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmaxi.du $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmaxi.du(<4 x i64> %va, i32 1) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-min-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-min-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..5ed4104c295fab07d323ab7c559ec8c97cc0f026 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-min-invalid-imm.ll @@ -0,0 +1,129 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvmini.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvmini_b_lo(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvmini.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvmini.b(<32 x i8> %va, i32 -17) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvmini_b_hi(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvmini.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvmini.b(<32 x i8> %va, i32 16) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvmini.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvmini_h_lo(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvmini.h: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvmini.h(<16 x i16> %va, i32 -17) + ret <16 x i16> %res +} + +define <16 x i16> @lasx_xvmini_h_hi(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvmini.h: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvmini.h(<16 x i16> %va, i32 16) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvmini.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvmini_w_lo(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvmini.w: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvmini.w(<8 x i32> %va, i32 -17) + ret <8 x i32> %res +} + +define <8 x i32> @lasx_xvmini_w_hi(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvmini.w: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvmini.w(<8 x i32> %va, i32 16) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmini.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvmini_d_lo(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvmini.d: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmini.d(<4 x i64> %va, i32 -17) + ret <4 x i64> %res +} + +define <4 x i64> @lasx_xvmini_d_hi(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvmini.d: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmini.d(<4 x i64> %va, i32 16) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvmini.bu(<32 x i8>, i32) + +define <32 x i8> @lasx_xvmini_bu_lo(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvmini.bu: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvmini.bu(<32 x i8> %va, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvmini_bu_hi(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvmini.bu: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvmini.bu(<32 x i8> %va, i32 32) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvmini.hu(<16 x i16>, i32) + +define <16 x i16> @lasx_xvmini_hu_lo(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvmini.hu: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvmini.hu(<16 x i16> %va, i32 -1) + ret <16 x i16> %res +} + +define <16 x i16> @lasx_xvmini_hu_hi(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvmini.hu: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvmini.hu(<16 x i16> %va, i32 32) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvmini.wu(<8 x i32>, i32) + +define <8 x i32> @lasx_xvmini_wu_lo(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvmini.wu: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvmini.wu(<8 x i32> %va, i32 -1) + ret <8 x i32> %res +} + +define <8 x i32> @lasx_xvmini_wu_hi(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvmini.wu: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvmini.wu(<8 x i32> %va, i32 32) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmini.du(<4 x i64>, i32) + +define <4 x i64> @lasx_xvmini_du_lo(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvmini.du: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmini.du(<4 x i64> %va, i32 -1) + ret <4 x i64> %res +} + +define <4 x i64> @lasx_xvmini_du_hi(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvmini.du: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmini.du(<4 x i64> %va, i32 32) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-min-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-min-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..b81931977aad43b725d943dda9fd4d7fe3d4ecd2 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-min-non-imm.ll @@ -0,0 +1,73 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvmini.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvmini_b(<32 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvmini.b(<32 x i8> %va, i32 %b) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvmini.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvmini_h(<16 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvmini.h(<16 x i16> %va, i32 %b) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvmini.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvmini_w(<8 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvmini.w(<8 x i32> %va, i32 %b) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmini.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvmini_d(<4 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmini.d(<4 x i64> %va, i32 %b) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvmini.bu(<32 x i8>, i32) + +define <32 x i8> @lasx_xvmini_bu(<32 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvmini.bu(<32 x i8> %va, i32 %b) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvmini.hu(<16 x i16>, i32) + +define <16 x i16> @lasx_xvmini_hu(<16 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvmini.hu(<16 x i16> %va, i32 %b) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvmini.wu(<8 x i32>, i32) + +define <8 x i32> @lasx_xvmini_wu(<8 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvmini.wu(<8 x i32> %va, i32 %b) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmini.du(<4 x i64>, i32) + +define <4 x i64> @lasx_xvmini_du(<4 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmini.du(<4 x i64> %va, i32 %b) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-min.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-min.ll new file mode 100644 index 0000000000000000000000000000000000000000..c94b1e4ea44cb7712c7a22e22540e8db1b2621ab --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-min.ll @@ -0,0 +1,194 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvmin.b(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvmin_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvmin_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmin.b $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvmin.b(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvmin.h(<16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvmin_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvmin_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmin.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvmin.h(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvmin.w(<8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvmin_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvmin_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmin.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvmin.w(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmin.d(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvmin_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvmin_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmin.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmin.d(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvmini.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvmini_b(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xvmini_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmini.b $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvmini.b(<32 x i8> %va, i32 1) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvmini.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvmini_h(<16 x i16> %va) nounwind { +; CHECK-LABEL: lasx_xvmini_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmini.h $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvmini.h(<16 x i16> %va, i32 1) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvmini.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvmini_w(<8 x i32> %va) nounwind { +; CHECK-LABEL: lasx_xvmini_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmini.w $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvmini.w(<8 x i32> %va, i32 1) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmini.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvmini_d(<4 x i64> %va) nounwind { +; CHECK-LABEL: lasx_xvmini_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmini.d $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmini.d(<4 x i64> %va, i32 1) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvmin.bu(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvmin_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvmin_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmin.bu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvmin.bu(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvmin.hu(<16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvmin_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvmin_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmin.hu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvmin.hu(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvmin.wu(<8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvmin_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvmin_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmin.wu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvmin.wu(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmin.du(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvmin_du(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvmin_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmin.du $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmin.du(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvmini.bu(<32 x i8>, i32) + +define <32 x i8> @lasx_xvmini_bu(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xvmini_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmini.bu $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvmini.bu(<32 x i8> %va, i32 1) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvmini.hu(<16 x i16>, i32) + +define <16 x i16> @lasx_xvmini_hu(<16 x i16> %va) nounwind { +; CHECK-LABEL: lasx_xvmini_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmini.hu $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvmini.hu(<16 x i16> %va, i32 1) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvmini.wu(<8 x i32>, i32) + +define <8 x i32> @lasx_xvmini_wu(<8 x i32> %va) nounwind { +; CHECK-LABEL: lasx_xvmini_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmini.wu $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvmini.wu(<8 x i32> %va, i32 1) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmini.du(<4 x i64>, i32) + +define <4 x i64> @lasx_xvmini_du(<4 x i64> %va) nounwind { +; CHECK-LABEL: lasx_xvmini_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmini.du $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmini.du(<4 x i64> %va, i32 1) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mod.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mod.ll new file mode 100644 index 0000000000000000000000000000000000000000..a177246bb23508a24159fe4fcda46423d50bdad6 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mod.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvmod.b(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvmod_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvmod_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmod.b $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvmod.b(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvmod.h(<16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvmod_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvmod_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmod.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvmod.h(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvmod.w(<8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvmod_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvmod_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmod.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvmod.w(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmod.d(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvmod_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvmod_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmod.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmod.d(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvmod.bu(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvmod_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvmod_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmod.bu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvmod.bu(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvmod.hu(<16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvmod_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvmod_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmod.hu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvmod.hu(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvmod.wu(<8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvmod_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvmod_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmod.wu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvmod.wu(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmod.du(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvmod_du(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvmod_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmod.du $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmod.du(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mskgez.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mskgez.ll new file mode 100644 index 0000000000000000000000000000000000000000..da87c20ad6ee0ae0ba3e20c762bb8b6dbb09b9d8 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mskgez.ll @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvmskgez.b(<32 x i8>) + +define <32 x i8> @lasx_xvmskgez_b(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xvmskgez_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmskgez.b $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvmskgez.b(<32 x i8> %va) + ret <32 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mskltz.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mskltz.ll new file mode 100644 index 0000000000000000000000000000000000000000..b2218487535c634371707d763bcba6b54af856d9 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mskltz.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvmskltz.b(<32 x i8>) + +define <32 x i8> @lasx_xvmskltz_b(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xvmskltz_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmskltz.b $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvmskltz.b(<32 x i8> %va) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvmskltz.h(<16 x i16>) + +define <16 x i16> @lasx_xvmskltz_h(<16 x i16> %va) nounwind { +; CHECK-LABEL: lasx_xvmskltz_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmskltz.h $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvmskltz.h(<16 x i16> %va) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvmskltz.w(<8 x i32>) + +define <8 x i32> @lasx_xvmskltz_w(<8 x i32> %va) nounwind { +; CHECK-LABEL: lasx_xvmskltz_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmskltz.w $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvmskltz.w(<8 x i32> %va) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmskltz.d(<4 x i64>) + +define <4 x i64> @lasx_xvmskltz_d(<4 x i64> %va) nounwind { +; CHECK-LABEL: lasx_xvmskltz_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmskltz.d $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmskltz.d(<4 x i64> %va) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-msknz.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-msknz.ll new file mode 100644 index 0000000000000000000000000000000000000000..becd2c883a7ed71d9f232f8430fdb90cdc607f02 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-msknz.ll @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvmsknz.b(<32 x i8>) + +define <32 x i8> @lasx_xvmsknz_b(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xvmsknz_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmsknz.b $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvmsknz.b(<32 x i8> %va) + ret <32 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-msub.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-msub.ll new file mode 100644 index 0000000000000000000000000000000000000000..c89f9578b77d7fe760d9255ab1039c0f35430af4 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-msub.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvmsub.b(<32 x i8>, <32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvmsub_b(<32 x i8> %va, <32 x i8> %vb, <32 x i8> %vc) nounwind { +; CHECK-LABEL: lasx_xvmsub_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmsub.b $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvmsub.b(<32 x i8> %va, <32 x i8> %vb, <32 x i8> %vc) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvmsub.h(<16 x i16>, <16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvmsub_h(<16 x i16> %va, <16 x i16> %vb, <16 x i16> %vc) nounwind { +; CHECK-LABEL: lasx_xvmsub_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmsub.h $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvmsub.h(<16 x i16> %va, <16 x i16> %vb, <16 x i16> %vc) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvmsub.w(<8 x i32>, <8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvmsub_w(<8 x i32> %va, <8 x i32> %vb, <8 x i32> %vc) nounwind { +; CHECK-LABEL: lasx_xvmsub_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmsub.w $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvmsub.w(<8 x i32> %va, <8 x i32> %vb, <8 x i32> %vc) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmsub.d(<4 x i64>, <4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvmsub_d(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) nounwind { +; CHECK-LABEL: lasx_xvmsub_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmsub.d $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmsub.d(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-muh.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-muh.ll new file mode 100644 index 0000000000000000000000000000000000000000..97461512ce1665fb38afb5f601a432d3edd950fe --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-muh.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvmuh.b(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvmuh_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvmuh_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmuh.b $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvmuh.b(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvmuh.h(<16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvmuh_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvmuh_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmuh.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvmuh.h(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvmuh.w(<8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvmuh_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvmuh_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmuh.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvmuh.w(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmuh.d(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvmuh_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvmuh_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmuh.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmuh.d(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvmuh.bu(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvmuh_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvmuh_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmuh.bu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvmuh.bu(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvmuh.hu(<16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvmuh_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvmuh_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmuh.hu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvmuh.hu(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvmuh.wu(<8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvmuh_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvmuh_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmuh.wu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvmuh.wu(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmuh.du(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvmuh_du(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvmuh_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmuh.du $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmuh.du(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mul.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mul.ll new file mode 100644 index 0000000000000000000000000000000000000000..d5d852e58a9f9ce6573b02e41bbc58f0f083b19b --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mul.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvmul.b(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvmul_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvmul_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmul.b $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvmul.b(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvmul.h(<16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvmul_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvmul_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmul.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvmul.h(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvmul.w(<8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvmul_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvmul_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmul.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvmul.w(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmul.d(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvmul_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvmul_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmul.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmul.d(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mulw.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mulw.ll new file mode 100644 index 0000000000000000000000000000000000000000..f69e64aa76980e852534c185ced9958301d87731 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-mulw.ll @@ -0,0 +1,290 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.b(<32 x i8>, <32 x i8>) + +define <16 x i16> @lasx_xvmulwev_h_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvmulwev_h_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmulwev.h.b $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.b(<32 x i8> %va, <32 x i8> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.h(<16 x i16>, <16 x i16>) + +define <8 x i32> @lasx_xvmulwev_w_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvmulwev_w_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmulwev.w.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.h(<16 x i16> %va, <16 x i16> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.w(<8 x i32>, <8 x i32>) + +define <4 x i64> @lasx_xvmulwev_d_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvmulwev_d_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmulwev.d.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.w(<8 x i32> %va, <8 x i32> %vb) + ret <4 x i64> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.d(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvmulwev_q_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvmulwev_q_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmulwev.q.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.d(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu(<32 x i8>, <32 x i8>) + +define <16 x i16> @lasx_xvmulwev_h_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvmulwev_h_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmulwev.h.bu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu(<32 x i8> %va, <32 x i8> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu(<16 x i16>, <16 x i16>) + +define <8 x i32> @lasx_xvmulwev_w_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvmulwev_w_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmulwev.w.hu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu(<16 x i16> %va, <16 x i16> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu(<8 x i32>, <8 x i32>) + +define <4 x i64> @lasx_xvmulwev_d_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvmulwev_d_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmulwev.d.wu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu(<8 x i32> %va, <8 x i32> %vb) + ret <4 x i64> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvmulwev_q_du(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvmulwev_q_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmulwev.q.du $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu.b(<32 x i8>, <32 x i8>) + +define <16 x i16> @lasx_xvmulwev_h_bu_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvmulwev_h_bu_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmulwev.h.bu.b $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu.b(<32 x i8> %va, <32 x i8> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu.h(<16 x i16>, <16 x i16>) + +define <8 x i32> @lasx_xvmulwev_w_hu_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvmulwev_w_hu_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmulwev.w.hu.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu.h(<16 x i16> %va, <16 x i16> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu.w(<8 x i32>, <8 x i32>) + +define <4 x i64> @lasx_xvmulwev_d_wu_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvmulwev_d_wu_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmulwev.d.wu.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu.w(<8 x i32> %va, <8 x i32> %vb) + ret <4 x i64> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du.d(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvmulwev_q_du_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvmulwev_q_du_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmulwev.q.du.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du.d(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.b(<32 x i8>, <32 x i8>) + +define <16 x i16> @lasx_xvmulwod_h_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvmulwod_h_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmulwod.h.b $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.b(<32 x i8> %va, <32 x i8> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.h(<16 x i16>, <16 x i16>) + +define <8 x i32> @lasx_xvmulwod_w_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvmulwod_w_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmulwod.w.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.h(<16 x i16> %va, <16 x i16> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.w(<8 x i32>, <8 x i32>) + +define <4 x i64> @lasx_xvmulwod_d_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvmulwod_d_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmulwod.d.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.w(<8 x i32> %va, <8 x i32> %vb) + ret <4 x i64> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.d(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvmulwod_q_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvmulwod_q_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmulwod.q.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.d(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu(<32 x i8>, <32 x i8>) + +define <16 x i16> @lasx_xvmulwod_h_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvmulwod_h_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmulwod.h.bu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu(<32 x i8> %va, <32 x i8> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu(<16 x i16>, <16 x i16>) + +define <8 x i32> @lasx_xvmulwod_w_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvmulwod_w_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmulwod.w.hu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu(<16 x i16> %va, <16 x i16> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu(<8 x i32>, <8 x i32>) + +define <4 x i64> @lasx_xvmulwod_d_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvmulwod_d_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmulwod.d.wu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu(<8 x i32> %va, <8 x i32> %vb) + ret <4 x i64> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvmulwod_q_du(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvmulwod_q_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmulwod.q.du $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu.b(<32 x i8>, <32 x i8>) + +define <16 x i16> @lasx_xvmulwod_h_bu_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvmulwod_h_bu_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmulwod.h.bu.b $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu.b(<32 x i8> %va, <32 x i8> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu.h(<16 x i16>, <16 x i16>) + +define <8 x i32> @lasx_xvmulwod_w_hu_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvmulwod_w_hu_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmulwod.w.hu.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu.h(<16 x i16> %va, <16 x i16> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu.w(<8 x i32>, <8 x i32>) + +define <4 x i64> @lasx_xvmulwod_d_wu_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvmulwod_d_wu_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmulwod.d.wu.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu.w(<8 x i32> %va, <8 x i32> %vb) + ret <4 x i64> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du.d(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvmulwod_q_du_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvmulwod_q_du_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvmulwod.q.du.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du.d(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-neg.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-neg.ll new file mode 100644 index 0000000000000000000000000000000000000000..ecbedf33465787ced0a707dc773e067b86e0c97f --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-neg.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvneg.b(<32 x i8>) + +define <32 x i8> @lasx_xvneg_b(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xvneg_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvneg.b $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvneg.b(<32 x i8> %va) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvneg.h(<16 x i16>) + +define <16 x i16> @lasx_xvneg_h(<16 x i16> %va) nounwind { +; CHECK-LABEL: lasx_xvneg_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvneg.h $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvneg.h(<16 x i16> %va) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvneg.w(<8 x i32>) + +define <8 x i32> @lasx_xvneg_w(<8 x i32> %va) nounwind { +; CHECK-LABEL: lasx_xvneg_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvneg.w $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvneg.w(<8 x i32> %va) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvneg.d(<4 x i64>) + +define <4 x i64> @lasx_xvneg_d(<4 x i64> %va) nounwind { +; CHECK-LABEL: lasx_xvneg_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvneg.d $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvneg.d(<4 x i64> %va) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-nor.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-nor.ll new file mode 100644 index 0000000000000000000000000000000000000000..674746b7624ec6c9a76ede0b49f77a4d48e3cc56 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-nor.ll @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvnor.v(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvnor_v(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvnor_v: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvnor.v $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvnor.v(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-nori-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-nori-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..1130e094bf1f97ef86b0b9f4d743b84ff62f38ef --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-nori-invalid-imm.ll @@ -0,0 +1,17 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvnori.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvnori_b_lo(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvnori.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvnori.b(<32 x i8> %va, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvnori_b_hi(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvnori.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvnori.b(<32 x i8> %va, i32 256) + ret <32 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-nori-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-nori-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..8f2333064d642fd0b4ce96295f9604828f301e2c --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-nori-non-imm.ll @@ -0,0 +1,10 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvnori.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvnori_b(<32 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvnori.b(<32 x i8> %va, i32 %b) + ret <32 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-nori.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-nori.ll new file mode 100644 index 0000000000000000000000000000000000000000..55eebf87ee921e054776d26c5ffa430191441e3a --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-nori.ll @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvnori.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvnori_b(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xvnori_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvnori.b $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvnori.b(<32 x i8> %va, i32 1) + ret <32 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-or.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-or.ll new file mode 100644 index 0000000000000000000000000000000000000000..16462cfafc54a1ea1c48253ae8aa6f5cfa654a71 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-or.ll @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvor.v(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvor_v(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvor_v: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvor.v(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ori-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ori-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..90dec8e55f2d83b60630909ccd5b800d23e7723d --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ori-invalid-imm.ll @@ -0,0 +1,17 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvori.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvori_b_lo(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvori.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvori.b(<32 x i8> %va, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvori_b_hi(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvori.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvori.b(<32 x i8> %va, i32 256) + ret <32 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ori-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ori-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..ae6571d98f4af186c6bf8a604d2ae0cfdc162276 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ori-non-imm.ll @@ -0,0 +1,10 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvori.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvori_b(<32 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvori.b(<32 x i8> %va, i32 %b) + ret <32 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ori.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ori.ll new file mode 100644 index 0000000000000000000000000000000000000000..8e53d88bac37460f3206a0935fbdb4d87f55cfa6 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ori.ll @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvori.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvori_b(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xvori_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvori.b $xr0, $xr0, 3 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvori.b(<32 x i8> %va, i32 3) + ret <32 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-orn.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-orn.ll new file mode 100644 index 0000000000000000000000000000000000000000..3a335cdd3716705f7e2a34a59fac279f1e51ff13 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-orn.ll @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvorn.v(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvorn_v(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvorn_v: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvorn.v $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvorn.v(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pack.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pack.ll new file mode 100644 index 0000000000000000000000000000000000000000..512b3023491720d6df35d2c626f9daef6fafb742 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pack.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvpackev.b(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvpackev_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvpackev_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpackev.b $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvpackev.b(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvpackev.h(<16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvpackev_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvpackev_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpackev.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvpackev.h(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvpackev.w(<8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvpackev_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvpackev_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpackev.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvpackev.w(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvpackev.d(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvpackev_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvpackev_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpackev.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvpackev.d(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvpackod.b(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvpackod_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvpackod_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpackod.b $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvpackod.b(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvpackod.h(<16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvpackod_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvpackod_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpackod.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvpackod.h(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvpackod.w(<8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvpackod_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvpackod_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpackod.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvpackod.w(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvpackod.d(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvpackod_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvpackod_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpackod.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvpackod.d(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pcnt.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pcnt.ll new file mode 100644 index 0000000000000000000000000000000000000000..d77f1d2082c8d7209e401a481d9fcceeb546515a --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pcnt.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvpcnt.b(<32 x i8>) + +define <32 x i8> @lasx_xvpcnt_b(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xvpcnt_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpcnt.b $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvpcnt.b(<32 x i8> %va) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvpcnt.h(<16 x i16>) + +define <16 x i16> @lasx_xvpcnt_h(<16 x i16> %va) nounwind { +; CHECK-LABEL: lasx_xvpcnt_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpcnt.h $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvpcnt.h(<16 x i16> %va) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvpcnt.w(<8 x i32>) + +define <8 x i32> @lasx_xvpcnt_w(<8 x i32> %va) nounwind { +; CHECK-LABEL: lasx_xvpcnt_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpcnt.w $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvpcnt.w(<8 x i32> %va) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvpcnt.d(<4 x i64>) + +define <4 x i64> @lasx_xvpcnt_d(<4 x i64> %va) nounwind { +; CHECK-LABEL: lasx_xvpcnt_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpcnt.d $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvpcnt.d(<4 x i64> %va) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-perm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-perm.ll new file mode 100644 index 0000000000000000000000000000000000000000..4ec434edd4ec73af6608bd9912b862aca1e295da --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-perm.ll @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <8 x i32> @llvm.loongarch.lasx.xvperm.w(<8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvperm_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvperm_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvperm.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvperm.w(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-permi-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-permi-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..41f4856bd8f71d29e489fcc4efc0ab35d4886dec --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-permi-invalid-imm.ll @@ -0,0 +1,49 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <8 x i32> @llvm.loongarch.lasx.xvpermi.w(<8 x i32>, <8 x i32>, i32) + +define <8 x i32> @lasx_xvpermi_w_lo(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvpermi.w: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvpermi.w(<8 x i32> %va, <8 x i32> %vb, i32 -1) + ret <8 x i32> %res +} + +define <8 x i32> @lasx_xvpermi_w_hi(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvpermi.w: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvpermi.w(<8 x i32> %va, <8 x i32> %vb, i32 256) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvpermi.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvpermi_d_lo(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvpermi.d: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvpermi.d(<4 x i64> %va, i32 -1) + ret <4 x i64> %res +} + +define <4 x i64> @lasx_xvpermi_d_hi(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvpermi.d: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvpermi.d(<4 x i64> %va, i32 256) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvpermi.q(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvpermi_q_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvpermi.q: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvpermi.q(<32 x i8> %va, <32 x i8> %vb, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvpermi_q_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvpermi.q: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvpermi.q(<32 x i8> %va, <32 x i8> %vb, i32 256) + ret <32 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-permi-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-permi-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..afb335c5d6cabd2274695209a3ac48d2484e0edd --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-permi-non-imm.ll @@ -0,0 +1,28 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <8 x i32> @llvm.loongarch.lasx.xvpermi.w(<8 x i32>, <8 x i32>, i32) + +define <8 x i32> @lasx_xvpermi_w(<8 x i32> %va, <8 x i32> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvpermi.w(<8 x i32> %va, <8 x i32> %vb, i32 %c) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvpermi.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvpermi_d(<4 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvpermi.d(<4 x i64> %va, i32 %b) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvpermi.q(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvpermi_q(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvpermi.q(<32 x i8> %va, <32 x i8> %vb, i32 %c) + ret <32 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-permi.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-permi.ll new file mode 100644 index 0000000000000000000000000000000000000000..0d9f9daabc44887efdda3375c15dd7e39490315a --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-permi.ll @@ -0,0 +1,38 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <8 x i32> @llvm.loongarch.lasx.xvpermi.w(<8 x i32>, <8 x i32>, i32) + +define <8 x i32> @lasx_xvpermi_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvpermi_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpermi.w $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvpermi.w(<8 x i32> %va, <8 x i32> %vb, i32 1) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvpermi.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvpermi_d(<4 x i64> %va) nounwind { +; CHECK-LABEL: lasx_xvpermi_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpermi.d $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvpermi.d(<4 x i64> %va, i32 1) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvpermi.q(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvpermi_q(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvpermi_q: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvpermi.q(<32 x i8> %va, <32 x i8> %vb, i32 1) + ret <32 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pick.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pick.ll new file mode 100644 index 0000000000000000000000000000000000000000..bbd6d693ca0b32f6ca0cd54a0a8ccf62b8bf230e --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pick.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvpickev.b(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvpickev_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvpickev_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpickev.b $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvpickev.b(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvpickev.h(<16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvpickev_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvpickev_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpickev.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvpickev.h(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvpickev.w(<8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvpickev_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvpickev_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpickev.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvpickev.w(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvpickev.d(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvpickev_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvpickev_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpickev.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvpickev.d(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvpickod.b(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvpickod_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvpickod_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpickod.b $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvpickod.b(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvpickod.h(<16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvpickod_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvpickod_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpickod.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvpickod.h(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvpickod.w(<8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvpickod_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvpickod_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpickod.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvpickod.w(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvpickod.d(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvpickod_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvpickod_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpickod.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvpickod.d(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..cfc6ec42874e1ff818e3e18af2a1a24f5462d47c --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve-invalid-imm.ll @@ -0,0 +1,65 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <8 x i32> @llvm.loongarch.lasx.xvpickve.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvpickve_w_lo(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvpickve.w: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvpickve.w(<8 x i32> %va, i32 -1) + ret <8 x i32> %res +} + +define <8 x i32> @lasx_xvpickve_w_hi(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvpickve.w: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvpickve.w(<8 x i32> %va, i32 8) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvpickve.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvpickve_d_lo(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvpickve.d: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvpickve.d(<4 x i64> %va, i32 -1) + ret <4 x i64> %res +} + +define <4 x i64> @lasx_xvpickve_d_hi(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvpickve.d: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvpickve.d(<4 x i64> %va, i32 4) + ret <4 x i64> %res +} + +declare <8 x float> @llvm.loongarch.lasx.xvpickve.w.f(<8 x float>, i32) + +define <8 x float> @lasx_xvpickve_w_f_lo(<8 x float> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvpickve.w.f: argument out of range +entry: + %res = call <8 x float> @llvm.loongarch.lasx.xvpickve.w.f(<8 x float> %va, i32 -1) + ret <8 x float> %res +} + +define <8 x float> @lasx_xvpickve_w_f_hi(<8 x float> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvpickve.w.f: argument out of range +entry: + %res = call <8 x float> @llvm.loongarch.lasx.xvpickve.w.f(<8 x float> %va, i32 8) + ret <8 x float> %res +} + +declare <4 x double> @llvm.loongarch.lasx.xvpickve.d.f(<4 x double>, i32) + +define <4 x double> @lasx_xvpickve_d_f_lo(<4 x double> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvpickve.d.f: argument out of range +entry: + %res = call <4 x double> @llvm.loongarch.lasx.xvpickve.d.f(<4 x double> %va, i32 -1) + ret <4 x double> %res +} + +define <4 x double> @lasx_xvpickve_d_f_hi(<4 x double> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvpickve.d.f: argument out of range +entry: + %res = call <4 x double> @llvm.loongarch.lasx.xvpickve.d.f(<4 x double> %va, i32 4) + ret <4 x double> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..be1f19a8973709d76ed9eb0a92b44c8914c5f174 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve-non-imm.ll @@ -0,0 +1,37 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <8 x i32> @llvm.loongarch.lasx.xvpickve.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvpickve_w(<8 x i32> %va, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvpickve.w(<8 x i32> %va, i32 %c) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvpickve.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvpickve_d(<4 x i64> %va, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvpickve.d(<4 x i64> %va, i32 %c) + ret <4 x i64> %res +} + +declare <8 x float> @llvm.loongarch.lasx.xvpickve.w.f(<8 x float>, i32) + +define <8 x float> @lasx_xvpickve_w_f(<8 x float> %va, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x float> @llvm.loongarch.lasx.xvpickve.w.f(<8 x float> %va, i32 %c) + ret <8 x float> %res +} + +declare <4 x double> @llvm.loongarch.lasx.xvpickve.d.f(<4 x double>, i32) + +define <4 x double> @lasx_xvpickve_d_f(<4 x double> %va, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x double> @llvm.loongarch.lasx.xvpickve.d.f(<4 x double> %va, i32 %c) + ret <4 x double> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve.ll new file mode 100644 index 0000000000000000000000000000000000000000..546777bc72ab4896893c4273d8fcabcc7b820501 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <8 x i32> @llvm.loongarch.lasx.xvpickve.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvpickve_w(<8 x i32> %va) nounwind { +; CHECK-LABEL: lasx_xvpickve_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpickve.w $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvpickve.w(<8 x i32> %va, i32 1) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvpickve.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvpickve_d(<4 x i64> %va) nounwind { +; CHECK-LABEL: lasx_xvpickve_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpickve.d $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvpickve.d(<4 x i64> %va, i32 1) + ret <4 x i64> %res +} + +declare <8 x float> @llvm.loongarch.lasx.xvpickve.w.f(<8 x float>, i32) + +define <8 x float> @lasx_xvpickve_w_f(<8 x float> %va) nounwind { +; CHECK-LABEL: lasx_xvpickve_w_f: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpickve.w $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x float> @llvm.loongarch.lasx.xvpickve.w.f(<8 x float> %va, i32 1) + ret <8 x float> %res +} + +declare <4 x double> @llvm.loongarch.lasx.xvpickve.d.f(<4 x double>, i32) + +define <4 x double> @lasx_xvpickve_d_f(<4 x double> %va) nounwind { +; CHECK-LABEL: lasx_xvpickve_d_f: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpickve.d $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x double> @llvm.loongarch.lasx.xvpickve.d.f(<4 x double> %va, i32 1) + ret <4 x double> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve2gr-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve2gr-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..93056b272dfc518569766ddb800abfaf16c1d942 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve2gr-invalid-imm.ll @@ -0,0 +1,65 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare i32 @llvm.loongarch.lasx.xvpickve2gr.w(<8 x i32>, i32) + +define i32 @lasx_xvpickve2gr_w_lo(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvpickve2gr.w: argument out of range +entry: + %res = call i32 @llvm.loongarch.lasx.xvpickve2gr.w(<8 x i32> %va, i32 -1) + ret i32 %res +} + +define i32 @lasx_xvpickve2gr_w_hi(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvpickve2gr.w: argument out of range +entry: + %res = call i32 @llvm.loongarch.lasx.xvpickve2gr.w(<8 x i32> %va, i32 8) + ret i32 %res +} + +declare i64 @llvm.loongarch.lasx.xvpickve2gr.d(<4 x i64>, i32) + +define i64 @lasx_xvpickve2gr_d_lo(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvpickve2gr.d: argument out of range +entry: + %res = call i64 @llvm.loongarch.lasx.xvpickve2gr.d(<4 x i64> %va, i32 -1) + ret i64 %res +} + +define i64 @lasx_xvpickve2gr_d_hi(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvpickve2gr.d: argument out of range +entry: + %res = call i64 @llvm.loongarch.lasx.xvpickve2gr.d(<4 x i64> %va, i32 4) + ret i64 %res +} + +declare i32 @llvm.loongarch.lasx.xvpickve2gr.wu(<8 x i32>, i32) + +define i32 @lasx_xvpickve2gr_wu_lo(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvpickve2gr.wu: argument out of range +entry: + %res = call i32 @llvm.loongarch.lasx.xvpickve2gr.wu(<8 x i32> %va, i32 -1) + ret i32 %res +} + +define i32 @lasx_xvpickve2gr_wu_hi(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvpickve2gr.wu: argument out of range +entry: + %res = call i32 @llvm.loongarch.lasx.xvpickve2gr.wu(<8 x i32> %va, i32 8) + ret i32 %res +} + +declare i64 @llvm.loongarch.lasx.xvpickve2gr.du(<4 x i64>, i32) + +define i64 @lasx_xvpickve2gr_du_lo(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvpickve2gr.du: argument out of range +entry: + %res = call i64 @llvm.loongarch.lasx.xvpickve2gr.du(<4 x i64> %va, i32 -1) + ret i64 %res +} + +define i64 @lasx_xvpickve2gr_du_hi(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvpickve2gr.du: argument out of range +entry: + %res = call i64 @llvm.loongarch.lasx.xvpickve2gr.du(<4 x i64> %va, i32 4) + ret i64 %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve2gr-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve2gr-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..0fa8c94adc60cd714895925f6c576307f03a3bdc --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve2gr-non-imm.ll @@ -0,0 +1,37 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare i32 @llvm.loongarch.lasx.xvpickve2gr.w(<8 x i32>, i32) + +define i32 @lasx_xvpickve2gr_w(<8 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call i32 @llvm.loongarch.lasx.xvpickve2gr.w(<8 x i32> %va, i32 %b) + ret i32 %res +} + +declare i64 @llvm.loongarch.lasx.xvpickve2gr.d(<4 x i64>, i32) + +define i64 @lasx_xvpickve2gr_d(<4 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call i64 @llvm.loongarch.lasx.xvpickve2gr.d(<4 x i64> %va, i32 %b) + ret i64 %res +} + +declare i32 @llvm.loongarch.lasx.xvpickve2gr.wu(<8 x i32>, i32) + +define i32 @lasx_xvpickve2gr_wu(<8 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call i32 @llvm.loongarch.lasx.xvpickve2gr.wu(<8 x i32> %va, i32 %b) + ret i32 %res +} + +declare i64 @llvm.loongarch.lasx.xvpickve2gr.du(<4 x i64>, i32) + +define i64 @lasx_xvpickve2gr_du(<4 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call i64 @llvm.loongarch.lasx.xvpickve2gr.du(<4 x i64> %va, i32 %b) + ret i64 %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve2gr.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve2gr.ll new file mode 100644 index 0000000000000000000000000000000000000000..0617e7424321bdf435b25ebaf0dc175fbdd073eb --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-pickve2gr.ll @@ -0,0 +1,53 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + + + + +declare i32 @llvm.loongarch.lasx.xvpickve2gr.w(<8 x i32>, i32) + +define i32 @lasx_xvpickve2gr_w(<8 x i32> %va) nounwind { +; CHECK-LABEL: lasx_xvpickve2gr_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call i32 @llvm.loongarch.lasx.xvpickve2gr.w(<8 x i32> %va, i32 1) + ret i32 %res +} + +declare i64 @llvm.loongarch.lasx.xvpickve2gr.d(<4 x i64>, i32) + +define i64 @lasx_xvpickve2gr_d(<4 x i64> %va) nounwind { +; CHECK-LABEL: lasx_xvpickve2gr_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call i64 @llvm.loongarch.lasx.xvpickve2gr.d(<4 x i64> %va, i32 1) + ret i64 %res +} + +declare i32 @llvm.loongarch.lasx.xvpickve2gr.wu(<8 x i32>, i32) + +define i32 @lasx_xvpickve2gr_wu(<8 x i32> %va) nounwind { +; CHECK-LABEL: lasx_xvpickve2gr_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpickve2gr.wu $a0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call i32 @llvm.loongarch.lasx.xvpickve2gr.wu(<8 x i32> %va, i32 1) + ret i32 %res +} + +declare i64 @llvm.loongarch.lasx.xvpickve2gr.du(<4 x i64>, i32) + +define i64 @lasx_xvpickve2gr_du(<4 x i64> %va) nounwind { +; CHECK-LABEL: lasx_xvpickve2gr_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvpickve2gr.du $a0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call i64 @llvm.loongarch.lasx.xvpickve2gr.du(<4 x i64> %va, i32 1) + ret i64 %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl128vei-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl128vei-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..a0cb309c54e19fe78faa87792e132fea610d4d25 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl128vei-invalid-imm.ll @@ -0,0 +1,65 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvrepl128vei.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvrepl128vei_b_lo(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvrepl128vei.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvrepl128vei.b(<32 x i8> %va, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvrepl128vei_b_hi(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvrepl128vei.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvrepl128vei.b(<32 x i8> %va, i32 16) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvrepl128vei.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvrepl128vei_h_lo(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvrepl128vei.h: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvrepl128vei.h(<16 x i16> %va, i32 -1) + ret <16 x i16> %res +} + +define <16 x i16> @lasx_xvrepl128vei_h_hi(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvrepl128vei.h: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvrepl128vei.h(<16 x i16> %va, i32 8) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvrepl128vei.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvrepl128vei_w_lo(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvrepl128vei.w: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvrepl128vei.w(<8 x i32> %va, i32 -1) + ret <8 x i32> %res +} + +define <8 x i32> @lasx_xvrepl128vei_w_hi(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvrepl128vei.w: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvrepl128vei.w(<8 x i32> %va, i32 4) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvrepl128vei.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvrepl128vei_d_lo(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvrepl128vei.d: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvrepl128vei.d(<4 x i64> %va, i32 -1) + ret <4 x i64> %res +} + +define <4 x i64> @lasx_xvrepl128vei_d_hi(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvrepl128vei.d: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvrepl128vei.d(<4 x i64> %va, i32 2) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl128vei-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl128vei-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..c537ffa66ba7f5e88fb6562286b7f6990d7f1ccd --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl128vei-non-imm.ll @@ -0,0 +1,37 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvrepl128vei.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvrepl128vei_b(<32 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvrepl128vei.b(<32 x i8> %va, i32 %b) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvrepl128vei.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvrepl128vei_h(<16 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvrepl128vei.h(<16 x i16> %va, i32 %b) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvrepl128vei.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvrepl128vei_w(<8 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvrepl128vei.w(<8 x i32> %va, i32 %b) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvrepl128vei.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvrepl128vei_d(<4 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvrepl128vei.d(<4 x i64> %va, i32 %b) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl128vei.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl128vei.ll new file mode 100644 index 0000000000000000000000000000000000000000..25fab44f461f56db1ef4c7bcd3b5045ec4524a85 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl128vei.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvrepl128vei.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvrepl128vei_b(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xvrepl128vei_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvrepl128vei.b $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvrepl128vei.b(<32 x i8> %va, i32 1) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvrepl128vei.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvrepl128vei_h(<16 x i16> %va) nounwind { +; CHECK-LABEL: lasx_xvrepl128vei_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvrepl128vei.h $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvrepl128vei.h(<16 x i16> %va, i32 1) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvrepl128vei.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvrepl128vei_w(<8 x i32> %va) nounwind { +; CHECK-LABEL: lasx_xvrepl128vei_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvrepl128vei.w $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvrepl128vei.w(<8 x i32> %va, i32 1) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvrepl128vei.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvrepl128vei_d(<4 x i64> %va) nounwind { +; CHECK-LABEL: lasx_xvrepl128vei_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvrepl128vei.d $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvrepl128vei.d(<4 x i64> %va, i32 1) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-replgr2vr.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-replgr2vr.ll new file mode 100644 index 0000000000000000000000000000000000000000..c71abd2205c6712a703c77cac00de76980e88964 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-replgr2vr.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvreplgr2vr.b(i32) + +define <32 x i8> @lasx_xvreplgr2vr_b(i32 %a) nounwind { +; CHECK-LABEL: lasx_xvreplgr2vr_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvreplgr2vr.b $xr0, $a0 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvreplgr2vr.b(i32 %a) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvreplgr2vr.h(i32) + +define <16 x i16> @lasx_xvreplgr2vr_h(i32 %a) nounwind { +; CHECK-LABEL: lasx_xvreplgr2vr_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvreplgr2vr.h $xr0, $a0 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvreplgr2vr.h(i32 %a) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvreplgr2vr.w(i32) + +define <8 x i32> @lasx_xvreplgr2vr_w(i32 %a) nounwind { +; CHECK-LABEL: lasx_xvreplgr2vr_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvreplgr2vr.w $xr0, $a0 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvreplgr2vr.w(i32 %a) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvreplgr2vr.d(i64) + +define <4 x i64> @lasx_xvreplgr2vr_d(i64 %a) nounwind { +; CHECK-LABEL: lasx_xvreplgr2vr_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvreplgr2vr.d $xr0, $a0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvreplgr2vr.d(i64 %a) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-replve.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-replve.ll new file mode 100644 index 0000000000000000000000000000000000000000..21d36ff7bb5ee06fb394ab9971dcbd5125d90553 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-replve.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvreplve.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvreplve_b(<32 x i8> %va, i32 %b) nounwind { +; CHECK-LABEL: lasx_xvreplve_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvreplve.b $xr0, $xr0, $a0 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvreplve.b(<32 x i8> %va, i32 %b) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvreplve.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvreplve_h(<16 x i16> %va, i32 %b) nounwind { +; CHECK-LABEL: lasx_xvreplve_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvreplve.h $xr0, $xr0, $a0 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvreplve.h(<16 x i16> %va, i32 %b) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvreplve.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvreplve_w(<8 x i32> %va, i32 %b) nounwind { +; CHECK-LABEL: lasx_xvreplve_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvreplve.w $xr0, $xr0, $a0 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvreplve.w(<8 x i32> %va, i32 %b) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvreplve.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvreplve_d(<4 x i64> %va, i32 %b) nounwind { +; CHECK-LABEL: lasx_xvreplve_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvreplve.d $xr0, $xr0, $a0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvreplve.d(<4 x i64> %va, i32 %b) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-replve0.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-replve0.ll new file mode 100644 index 0000000000000000000000000000000000000000..7996bb36ef03cb91067721c3626d3710da3009f9 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-replve0.ll @@ -0,0 +1,62 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvreplve0.b(<32 x i8>) + +define <32 x i8> @lasx_xvreplve0_b(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xvreplve0_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvreplve0.b $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvreplve0.b(<32 x i8> %va) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvreplve0.h(<16 x i16>) + +define <16 x i16> @lasx_xvreplve0_h(<16 x i16> %va) nounwind { +; CHECK-LABEL: lasx_xvreplve0_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvreplve0.h $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvreplve0.h(<16 x i16> %va) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvreplve0.w(<8 x i32>) + +define <8 x i32> @lasx_xvreplve0_w(<8 x i32> %va) nounwind { +; CHECK-LABEL: lasx_xvreplve0_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvreplve0.w $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvreplve0.w(<8 x i32> %va) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvreplve0.d(<4 x i64>) + +define <4 x i64> @lasx_xvreplve0_d(<4 x i64> %va) nounwind { +; CHECK-LABEL: lasx_xvreplve0_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvreplve0.d $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvreplve0.d(<4 x i64> %va) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvreplve0.q(<32 x i8>) + +define <32 x i8> @lasx_xvreplve0_q(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xvreplve0_q: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvreplve0.q $xr0, $xr0 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvreplve0.q(<32 x i8> %va) + ret <32 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-rotr-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-rotr-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..40abdf49760500a7aa4de23459a02f4c5f9ce9e9 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-rotr-invalid-imm.ll @@ -0,0 +1,65 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvrotri.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvrotri_b_lo(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvrotri.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvrotri.b(<32 x i8> %va, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvrotri_b_hi(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvrotri.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvrotri.b(<32 x i8> %va, i32 8) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvrotri.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvrotri_h_lo(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvrotri.h: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvrotri.h(<16 x i16> %va, i32 -1) + ret <16 x i16> %res +} + +define <16 x i16> @lasx_xvrotri_h_hi(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvrotri.h: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvrotri.h(<16 x i16> %va, i32 16) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvrotri.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvrotri_w_lo(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvrotri.w: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvrotri.w(<8 x i32> %va, i32 -1) + ret <8 x i32> %res +} + +define <8 x i32> @lasx_xvrotri_w_hi(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvrotri.w: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvrotri.w(<8 x i32> %va, i32 32) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvrotri.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvrotri_d_lo(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvrotri.d: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvrotri.d(<4 x i64> %va, i32 -1) + ret <4 x i64> %res +} + +define <4 x i64> @lasx_xvrotri_d_hi(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvrotri.d: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvrotri.d(<4 x i64> %va, i32 64) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-rotr-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-rotr-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..dd38301d0534560ea7bfccecb993d35f2f14aa1c --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-rotr-non-imm.ll @@ -0,0 +1,37 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvrotri.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvrotri_b(<32 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvrotri.b(<32 x i8> %va, i32 %b) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvrotri.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvrotri_h(<16 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvrotri.h(<16 x i16> %va, i32 %b) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvrotri.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvrotri_w(<8 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvrotri.w(<8 x i32> %va, i32 %b) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvrotri.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvrotri_d(<4 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvrotri.d(<4 x i64> %va, i32 %b) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-rotr.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-rotr.ll new file mode 100644 index 0000000000000000000000000000000000000000..64d2773864e9f016243b921c968f12f17ae380db --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-rotr.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvrotr.b(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvrotr_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvrotr_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvrotr.b $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvrotr.b(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvrotr.h(<16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvrotr_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvrotr_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvrotr.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvrotr.h(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvrotr.w(<8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvrotr_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvrotr_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvrotr.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvrotr.w(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvrotr.d(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvrotr_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvrotr_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvrotr.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvrotr.d(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvrotri.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvrotri_b(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xvrotri_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvrotri.b $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvrotri.b(<32 x i8> %va, i32 1) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvrotri.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvrotri_h(<16 x i16> %va) nounwind { +; CHECK-LABEL: lasx_xvrotri_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvrotri.h $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvrotri.h(<16 x i16> %va, i32 1) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvrotri.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvrotri_w(<8 x i32> %va) nounwind { +; CHECK-LABEL: lasx_xvrotri_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvrotri.w $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvrotri.w(<8 x i32> %va, i32 1) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvrotri.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvrotri_d(<4 x i64> %va) nounwind { +; CHECK-LABEL: lasx_xvrotri_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvrotri.d $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvrotri.d(<4 x i64> %va, i32 1) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sadd.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sadd.ll new file mode 100644 index 0000000000000000000000000000000000000000..54a5e2e9c8332d3c955d1f044245e2172cecaec4 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sadd.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvsadd.b(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvsadd_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvsadd_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsadd.b $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsadd.b(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsadd.h(<16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvsadd_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvsadd_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsadd.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsadd.h(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsadd.w(<8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvsadd_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvsadd_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsadd.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsadd.w(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsadd.d(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvsadd_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvsadd_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsadd.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsadd.d(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvsadd.bu(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvsadd_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvsadd_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsadd.bu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsadd.bu(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsadd.hu(<16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvsadd_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvsadd_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsadd.hu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsadd.hu(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsadd.wu(<8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvsadd_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvsadd_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsadd.wu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsadd.wu(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsadd.du(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvsadd_du(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvsadd_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsadd.du $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsadd.du(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sat-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sat-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..839fbc9990d34c09fad08aad7ff47fa153882eec --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sat-invalid-imm.ll @@ -0,0 +1,129 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvsat.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvsat_b_lo(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsat.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsat.b(<32 x i8> %va, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvsat_b_hi(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsat.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsat.b(<32 x i8> %va, i32 8) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsat.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvsat_h_lo(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsat.h: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsat.h(<16 x i16> %va, i32 -1) + ret <16 x i16> %res +} + +define <16 x i16> @lasx_xvsat_h_hi(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsat.h: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsat.h(<16 x i16> %va, i32 16) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsat.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvsat_w_lo(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsat.w: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsat.w(<8 x i32> %va, i32 -1) + ret <8 x i32> %res +} + +define <8 x i32> @lasx_xvsat_w_hi(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsat.w: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsat.w(<8 x i32> %va, i32 32) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsat.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvsat_d_lo(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsat.d: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsat.d(<4 x i64> %va, i32 -1) + ret <4 x i64> %res +} + +define <4 x i64> @lasx_xvsat_d_hi(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsat.d: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsat.d(<4 x i64> %va, i32 64) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvsat.bu(<32 x i8>, i32) + +define <32 x i8> @lasx_xvsat_bu_lo(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsat.bu: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsat.bu(<32 x i8> %va, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvsat_bu_hi(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsat.bu: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsat.bu(<32 x i8> %va, i32 8) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsat.hu(<16 x i16>, i32) + +define <16 x i16> @lasx_xvsat_hu_lo(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsat.hu: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsat.hu(<16 x i16> %va, i32 -1) + ret <16 x i16> %res +} + +define <16 x i16> @lasx_xvsat_hu_hi(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsat.hu: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsat.hu(<16 x i16> %va, i32 16) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsat.wu(<8 x i32>, i32) + +define <8 x i32> @lasx_xvsat_wu_lo(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsat.wu: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsat.wu(<8 x i32> %va, i32 -1) + ret <8 x i32> %res +} + +define <8 x i32> @lasx_xvsat_wu_hi(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsat.wu: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsat.wu(<8 x i32> %va, i32 32) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsat.du(<4 x i64>, i32) + +define <4 x i64> @lasx_xvsat_du_lo(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsat.du: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsat.du(<4 x i64> %va, i32 -1) + ret <4 x i64> %res +} + +define <4 x i64> @lasx_xvsat_du_hi(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsat.du: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsat.du(<4 x i64> %va, i32 64) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sat-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sat-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..b73b32ebd3b021ff5781ff392a240ee6f7c86b42 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sat-non-imm.ll @@ -0,0 +1,73 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvsat.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvsat_b(<32 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsat.b(<32 x i8> %va, i32 %b) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsat.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvsat_h(<16 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsat.h(<16 x i16> %va, i32 %b) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsat.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvsat_w(<8 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsat.w(<8 x i32> %va, i32 %b) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsat.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvsat_d(<4 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsat.d(<4 x i64> %va, i32 %b) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvsat.bu(<32 x i8>, i32) + +define <32 x i8> @lasx_xvsat_bu(<32 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsat.bu(<32 x i8> %va, i32 %b) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsat.hu(<16 x i16>, i32) + +define <16 x i16> @lasx_xvsat_hu(<16 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsat.hu(<16 x i16> %va, i32 %b) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsat.wu(<8 x i32>, i32) + +define <8 x i32> @lasx_xvsat_wu(<8 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsat.wu(<8 x i32> %va, i32 %b) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsat.du(<4 x i64>, i32) + +define <4 x i64> @lasx_xvsat_du(<4 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsat.du(<4 x i64> %va, i32 %b) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sat.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sat.ll new file mode 100644 index 0000000000000000000000000000000000000000..293b9dc9eb4d9f309711b2bb65ce0429ca7d4547 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sat.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvsat.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvsat_b(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xvsat_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsat.b $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsat.b(<32 x i8> %va, i32 1) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsat.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvsat_h(<16 x i16> %va) nounwind { +; CHECK-LABEL: lasx_xvsat_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsat.h $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsat.h(<16 x i16> %va, i32 1) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsat.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvsat_w(<8 x i32> %va) nounwind { +; CHECK-LABEL: lasx_xvsat_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsat.w $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsat.w(<8 x i32> %va, i32 1) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsat.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvsat_d(<4 x i64> %va) nounwind { +; CHECK-LABEL: lasx_xvsat_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsat.d $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsat.d(<4 x i64> %va, i32 1) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvsat.bu(<32 x i8>, i32) + +define <32 x i8> @lasx_xvsat_bu(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xvsat_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsat.bu $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsat.bu(<32 x i8> %va, i32 1) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsat.hu(<16 x i16>, i32) + +define <16 x i16> @lasx_xvsat_hu(<16 x i16> %va) nounwind { +; CHECK-LABEL: lasx_xvsat_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsat.hu $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsat.hu(<16 x i16> %va, i32 1) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsat.wu(<8 x i32>, i32) + +define <8 x i32> @lasx_xvsat_wu(<8 x i32> %va) nounwind { +; CHECK-LABEL: lasx_xvsat_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsat.wu $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsat.wu(<8 x i32> %va, i32 1) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsat.du(<4 x i64>, i32) + +define <4 x i64> @lasx_xvsat_du(<4 x i64> %va) nounwind { +; CHECK-LABEL: lasx_xvsat_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsat.du $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsat.du(<4 x i64> %va, i32 1) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-seq-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-seq-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..bb6ef0cc6574cce8f25c0124781a9f792fdaf7bf --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-seq-invalid-imm.ll @@ -0,0 +1,65 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvseqi.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvseqi_b_lo(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvseqi.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvseqi.b(<32 x i8> %va, i32 -17) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvseqi_b_hi(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvseqi.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvseqi.b(<32 x i8> %va, i32 16) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvseqi.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvseqi_h_lo(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvseqi.h: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvseqi.h(<16 x i16> %va, i32 -17) + ret <16 x i16> %res +} + +define <16 x i16> @lasx_xvseqi_h_hi(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvseqi.h: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvseqi.h(<16 x i16> %va, i32 16) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvseqi.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvseqi_w_lo(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvseqi.w: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvseqi.w(<8 x i32> %va, i32 -17) + ret <8 x i32> %res +} + +define <8 x i32> @lasx_xvseqi_w_hi(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvseqi.w: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvseqi.w(<8 x i32> %va, i32 16) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvseqi.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvseqi_d_lo(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvseqi.d: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvseqi.d(<4 x i64> %va, i32 -17) + ret <4 x i64> %res +} + +define <4 x i64> @lasx_xvseqi_d_hi(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvseqi.d: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvseqi.d(<4 x i64> %va, i32 16) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-seq-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-seq-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..fb2c6206da7b9531958bf1d27cead752cc83b58f --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-seq-non-imm.ll @@ -0,0 +1,37 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvseqi.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvseqi_b(<32 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvseqi.b(<32 x i8> %va, i32 %b) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvseqi.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvseqi_h(<16 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvseqi.h(<16 x i16> %va, i32 %b) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvseqi.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvseqi_w(<8 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvseqi.w(<8 x i32> %va, i32 %b) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvseqi.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvseqi_d(<4 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvseqi.d(<4 x i64> %va, i32 %b) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-seq.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-seq.ll new file mode 100644 index 0000000000000000000000000000000000000000..83bc93c88c73c3bd26fc381e3324a6454b877731 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-seq.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvseq.b(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvseq_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvseq_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvseq.b $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvseq.b(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvseq.h(<16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvseq_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvseq_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvseq.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvseq.h(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvseq.w(<8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvseq_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvseq_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvseq.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvseq.w(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvseq.d(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvseq_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvseq_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvseq.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvseq.d(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvseqi.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvseqi_b(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xvseqi_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvseqi.b $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvseqi.b(<32 x i8> %va, i32 1) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvseqi.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvseqi_h(<16 x i16> %va) nounwind { +; CHECK-LABEL: lasx_xvseqi_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvseqi.h $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvseqi.h(<16 x i16> %va, i32 1) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvseqi.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvseqi_w(<8 x i32> %va) nounwind { +; CHECK-LABEL: lasx_xvseqi_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvseqi.w $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvseqi.w(<8 x i32> %va, i32 1) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvseqi.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvseqi_d(<4 x i64> %va) nounwind { +; CHECK-LABEL: lasx_xvseqi_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvseqi.d $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvseqi.d(<4 x i64> %va, i32 1) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-set.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-set.ll new file mode 100644 index 0000000000000000000000000000000000000000..6e3e2e0330f525a9566e63cf20dd423ea5fa4146 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-set.ll @@ -0,0 +1,38 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare i32 @llvm.loongarch.lasx.xbz.v(<32 x i8>) + +define i32 @lasx_xbz_v(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xbz_v: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvseteqz.v $fcc0, $xr0 +; CHECK-NEXT: bcnez $fcc0, .LBB0_2 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: addi.w $a0, $zero, 0 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB0_2: # %entry +; CHECK-NEXT: addi.w $a0, $zero, 1 +; CHECK-NEXT: ret +entry: + %res = call i32 @llvm.loongarch.lasx.xbz.v(<32 x i8> %va) + ret i32 %res +} + +declare i32 @llvm.loongarch.lasx.xbnz.v(<32 x i8>) + +define i32 @lasx_xbnz_v(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xbnz_v: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsetnez.v $fcc0, $xr0 +; CHECK-NEXT: bcnez $fcc0, .LBB1_2 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: addi.w $a0, $zero, 0 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB1_2: # %entry +; CHECK-NEXT: addi.w $a0, $zero, 1 +; CHECK-NEXT: ret +entry: + %res = call i32 @llvm.loongarch.lasx.xbnz.v(<32 x i8> %va) + ret i32 %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-setallnez.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-setallnez.ll new file mode 100644 index 0000000000000000000000000000000000000000..a466b78bf8d2d0af2e607fae4ee3fac1ac851873 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-setallnez.ll @@ -0,0 +1,74 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare i32 @llvm.loongarch.lasx.xbnz.b(<32 x i8>) + +define i32 @lasx_xbnz_b(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xbnz_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsetallnez.b $fcc0, $xr0 +; CHECK-NEXT: bcnez $fcc0, .LBB0_2 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: addi.w $a0, $zero, 0 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB0_2: # %entry +; CHECK-NEXT: addi.w $a0, $zero, 1 +; CHECK-NEXT: ret +entry: + %res = call i32 @llvm.loongarch.lasx.xbnz.b(<32 x i8> %va) + ret i32 %res +} + +declare i32 @llvm.loongarch.lasx.xbnz.h(<16 x i16>) + +define i32 @lasx_xbnz_h(<16 x i16> %va) nounwind { +; CHECK-LABEL: lasx_xbnz_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsetallnez.h $fcc0, $xr0 +; CHECK-NEXT: bcnez $fcc0, .LBB1_2 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: addi.w $a0, $zero, 0 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB1_2: # %entry +; CHECK-NEXT: addi.w $a0, $zero, 1 +; CHECK-NEXT: ret +entry: + %res = call i32 @llvm.loongarch.lasx.xbnz.h(<16 x i16> %va) + ret i32 %res +} + +declare i32 @llvm.loongarch.lasx.xbnz.w(<8 x i32>) + +define i32 @lasx_xbnz_w(<8 x i32> %va) nounwind { +; CHECK-LABEL: lasx_xbnz_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsetallnez.w $fcc0, $xr0 +; CHECK-NEXT: bcnez $fcc0, .LBB2_2 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: addi.w $a0, $zero, 0 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB2_2: # %entry +; CHECK-NEXT: addi.w $a0, $zero, 1 +; CHECK-NEXT: ret +entry: + %res = call i32 @llvm.loongarch.lasx.xbnz.w(<8 x i32> %va) + ret i32 %res +} + +declare i32 @llvm.loongarch.lasx.xbnz.d(<4 x i64>) + +define i32 @lasx_xbnz_d(<4 x i64> %va) nounwind { +; CHECK-LABEL: lasx_xbnz_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsetallnez.d $fcc0, $xr0 +; CHECK-NEXT: bcnez $fcc0, .LBB3_2 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: addi.w $a0, $zero, 0 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB3_2: # %entry +; CHECK-NEXT: addi.w $a0, $zero, 1 +; CHECK-NEXT: ret +entry: + %res = call i32 @llvm.loongarch.lasx.xbnz.d(<4 x i64> %va) + ret i32 %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-setanyeqz.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-setanyeqz.ll new file mode 100644 index 0000000000000000000000000000000000000000..36e65fc5b32811fa17f0f1629695800c07aded55 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-setanyeqz.ll @@ -0,0 +1,74 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare i32 @llvm.loongarch.lasx.xbz.b(<32 x i8>) + +define i32 @lasx_xbz_b(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xbz_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsetanyeqz.b $fcc0, $xr0 +; CHECK-NEXT: bcnez $fcc0, .LBB0_2 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: addi.w $a0, $zero, 0 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB0_2: # %entry +; CHECK-NEXT: addi.w $a0, $zero, 1 +; CHECK-NEXT: ret +entry: + %res = call i32 @llvm.loongarch.lasx.xbz.b(<32 x i8> %va) + ret i32 %res +} + +declare i32 @llvm.loongarch.lasx.xbz.h(<16 x i16>) + +define i32 @lasx_xbz_h(<16 x i16> %va) nounwind { +; CHECK-LABEL: lasx_xbz_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsetanyeqz.h $fcc0, $xr0 +; CHECK-NEXT: bcnez $fcc0, .LBB1_2 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: addi.w $a0, $zero, 0 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB1_2: # %entry +; CHECK-NEXT: addi.w $a0, $zero, 1 +; CHECK-NEXT: ret +entry: + %res = call i32 @llvm.loongarch.lasx.xbz.h(<16 x i16> %va) + ret i32 %res +} + +declare i32 @llvm.loongarch.lasx.xbz.w(<8 x i32>) + +define i32 @lasx_xbz_w(<8 x i32> %va) nounwind { +; CHECK-LABEL: lasx_xbz_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsetanyeqz.w $fcc0, $xr0 +; CHECK-NEXT: bcnez $fcc0, .LBB2_2 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: addi.w $a0, $zero, 0 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB2_2: # %entry +; CHECK-NEXT: addi.w $a0, $zero, 1 +; CHECK-NEXT: ret +entry: + %res = call i32 @llvm.loongarch.lasx.xbz.w(<8 x i32> %va) + ret i32 %res +} + +declare i32 @llvm.loongarch.lasx.xbz.d(<4 x i64>) + +define i32 @lasx_xbz_d(<4 x i64> %va) nounwind { +; CHECK-LABEL: lasx_xbz_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsetanyeqz.d $fcc0, $xr0 +; CHECK-NEXT: bcnez $fcc0, .LBB3_2 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: addi.w $a0, $zero, 0 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB3_2: # %entry +; CHECK-NEXT: addi.w $a0, $zero, 1 +; CHECK-NEXT: ret +entry: + %res = call i32 @llvm.loongarch.lasx.xbz.d(<4 x i64> %va) + ret i32 %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf.ll new file mode 100644 index 0000000000000000000000000000000000000000..9b9140f6ad62178b4255e67b1d3b44502b16d8b9 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvshuf.b(<32 x i8>, <32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvshuf_b(<32 x i8> %va, <32 x i8> %vb, <32 x i8> %vc) nounwind { +; CHECK-LABEL: lasx_xvshuf_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvshuf.b $xr0, $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvshuf.b(<32 x i8> %va, <32 x i8> %vb, <32 x i8> %vc) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvshuf.h(<16 x i16>, <16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvshuf_h(<16 x i16> %va, <16 x i16> %vb, <16 x i16> %vc) nounwind { +; CHECK-LABEL: lasx_xvshuf_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvshuf.h $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvshuf.h(<16 x i16> %va, <16 x i16> %vb, <16 x i16> %vc) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvshuf.w(<8 x i32>, <8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvshuf_w(<8 x i32> %va, <8 x i32> %vb, <8 x i32> %vc) nounwind { +; CHECK-LABEL: lasx_xvshuf_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvshuf.w $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvshuf.w(<8 x i32> %va, <8 x i32> %vb, <8 x i32> %vc) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvshuf.d(<4 x i64>, <4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvshuf_d(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) nounwind { +; CHECK-LABEL: lasx_xvshuf_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvshuf.d $xr0, $xr1, $xr2 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvshuf.d(<4 x i64> %va, <4 x i64> %vb, <4 x i64> %vc) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf4i-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf4i-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..9217d1f6a05da2c7a152bc34d55087585282e434 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf4i-invalid-imm.ll @@ -0,0 +1,65 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvshuf4i.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvshuf4i_b_lo(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvshuf4i.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvshuf4i.b(<32 x i8> %va, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvshuf4i_b_hi(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvshuf4i.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvshuf4i.b(<32 x i8> %va, i32 256) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvshuf4i.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvshuf4i_h_lo(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvshuf4i.h: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvshuf4i.h(<16 x i16> %va, i32 -1) + ret <16 x i16> %res +} + +define <16 x i16> @lasx_xvshuf4i_h_hi(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvshuf4i.h: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvshuf4i.h(<16 x i16> %va, i32 256) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvshuf4i.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvshuf4i_w_lo(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvshuf4i.w: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvshuf4i.w(<8 x i32> %va, i32 -1) + ret <8 x i32> %res +} + +define <8 x i32> @lasx_xvshuf4i_w_hi(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvshuf4i.w: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvshuf4i.w(<8 x i32> %va, i32 256) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvshuf4i.d(<4 x i64>, <4 x i64>, i32) + +define <4 x i64> @lasx_xvshuf4i_d_lo(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvshuf4i.d: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvshuf4i.d(<4 x i64> %va, <4 x i64> %vb, i32 -1) + ret <4 x i64> %res +} + +define <4 x i64> @lasx_xvshuf4i_d_hi(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvshuf4i.d: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvshuf4i.d(<4 x i64> %va, <4 x i64> %vb, i32 256) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf4i-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf4i-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..8d6d1c69419380e9430586fd20d9434d6f5235d6 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf4i-non-imm.ll @@ -0,0 +1,37 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvshuf4i.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvshuf4i_b(<32 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvshuf4i.b(<32 x i8> %va, i32 %b) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvshuf4i.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvshuf4i_h(<16 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvshuf4i.h(<16 x i16> %va, i32 %b) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvshuf4i.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvshuf4i_w(<8 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvshuf4i.w(<8 x i32> %va, i32 %b) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvshuf4i.d(<4 x i64>, <4 x i64>, i32) + +define <4 x i64> @lasx_xvshuf4i_d(<4 x i64> %va, <4 x i64> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvshuf4i.d(<4 x i64> %va, <4 x i64> %vb, i32 %c) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf4i.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf4i.ll new file mode 100644 index 0000000000000000000000000000000000000000..31205086759c404892bd829ed696a9ae5699b507 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-shuf4i.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvshuf4i.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvshuf4i_b(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xvshuf4i_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvshuf4i.b $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvshuf4i.b(<32 x i8> %va, i32 1) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvshuf4i.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvshuf4i_h(<16 x i16> %va) nounwind { +; CHECK-LABEL: lasx_xvshuf4i_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvshuf4i.h $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvshuf4i.h(<16 x i16> %va, i32 1) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvshuf4i.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvshuf4i_w(<8 x i32> %va) nounwind { +; CHECK-LABEL: lasx_xvshuf4i_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvshuf4i.w $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvshuf4i.w(<8 x i32> %va, i32 1) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvshuf4i.d(<4 x i64>, <4 x i64>, i32) + +define <4 x i64> @lasx_xvshuf4i_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvshuf4i_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvshuf4i.d $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvshuf4i.d(<4 x i64> %va, <4 x i64> %vb, i32 1) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-signcov.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-signcov.ll new file mode 100644 index 0000000000000000000000000000000000000000..e6c6d8ccd0d3507caaf304d01103e62aa4dc33fd --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-signcov.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvsigncov.b(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvsigncov_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvsigncov_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsigncov.b $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsigncov.b(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsigncov.h(<16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvsigncov_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvsigncov_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsigncov.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsigncov.h(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsigncov.w(<8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvsigncov_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvsigncov_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsigncov.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsigncov.w(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsigncov.d(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvsigncov_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvsigncov_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsigncov.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsigncov.d(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sle-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sle-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..5b10aca9801d67fba3ac1f5990baa52e2233c1d6 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sle-invalid-imm.ll @@ -0,0 +1,129 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvslei.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvslei_b_lo(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvslei.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvslei.b(<32 x i8> %va, i32 -17) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvslei_b_hi(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvslei.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvslei.b(<32 x i8> %va, i32 16) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvslei.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvslei_h_lo(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvslei.h: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvslei.h(<16 x i16> %va, i32 -17) + ret <16 x i16> %res +} + +define <16 x i16> @lasx_xvslei_h_hi(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvslei.h: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvslei.h(<16 x i16> %va, i32 16) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvslei.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvslei_w_lo(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvslei.w: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvslei.w(<8 x i32> %va, i32 -17) + ret <8 x i32> %res +} + +define <8 x i32> @lasx_xvslei_w_hi(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvslei.w: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvslei.w(<8 x i32> %va, i32 16) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvslei.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvslei_d_lo(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvslei.d: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvslei.d(<4 x i64> %va, i32 -17) + ret <4 x i64> %res +} + +define <4 x i64> @lasx_xvslei_d_hi(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvslei.d: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvslei.d(<4 x i64> %va, i32 16) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvslei.bu(<32 x i8>, i32) + +define <32 x i8> @lasx_xvslei_bu_lo(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvslei.bu: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvslei.bu(<32 x i8> %va, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvslei_bu_hi(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvslei.bu: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvslei.bu(<32 x i8> %va, i32 32) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvslei.hu(<16 x i16>, i32) + +define <16 x i16> @lasx_xvslei_hu_lo(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvslei.hu: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvslei.hu(<16 x i16> %va, i32 -1) + ret <16 x i16> %res +} + +define <16 x i16> @lasx_xvslei_hu_hi(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvslei.hu: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvslei.hu(<16 x i16> %va, i32 32) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvslei.wu(<8 x i32>, i32) + +define <8 x i32> @lasx_xvslei_wu_lo(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvslei.wu: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvslei.wu(<8 x i32> %va, i32 -1) + ret <8 x i32> %res +} + +define <8 x i32> @lasx_xvslei_wu_hi(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvslei.wu: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvslei.wu(<8 x i32> %va, i32 32) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvslei.du(<4 x i64>, i32) + +define <4 x i64> @lasx_xvslei_du_lo(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvslei.du: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvslei.du(<4 x i64> %va, i32 -1) + ret <4 x i64> %res +} + +define <4 x i64> @lasx_xvslei_du_hi(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvslei.du: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvslei.du(<4 x i64> %va, i32 32) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sle-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sle-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..903bc10d88b789614b515165e656d745df7ecb1e --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sle-non-imm.ll @@ -0,0 +1,73 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvslei.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvslei_b(<32 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvslei.b(<32 x i8> %va, i32 %b) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvslei.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvslei_h(<16 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvslei.h(<16 x i16> %va, i32 %b) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvslei.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvslei_w(<8 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvslei.w(<8 x i32> %va, i32 %b) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvslei.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvslei_d(<4 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvslei.d(<4 x i64> %va, i32 %b) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvslei.bu(<32 x i8>, i32) + +define <32 x i8> @lasx_xvslei_bu(<32 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvslei.bu(<32 x i8> %va, i32 %b) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvslei.hu(<16 x i16>, i32) + +define <16 x i16> @lasx_xvslei_hu(<16 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvslei.hu(<16 x i16> %va, i32 %b) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvslei.wu(<8 x i32>, i32) + +define <8 x i32> @lasx_xvslei_wu(<8 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvslei.wu(<8 x i32> %va, i32 %b) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvslei.du(<4 x i64>, i32) + +define <4 x i64> @lasx_xvslei_du(<4 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvslei.du(<4 x i64> %va, i32 %b) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sle.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sle.ll new file mode 100644 index 0000000000000000000000000000000000000000..8895efc84b845d9b0dc3c445b4650817c168138d --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sle.ll @@ -0,0 +1,194 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvsle.b(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvsle_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvsle_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsle.b $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsle.b(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsle.h(<16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvsle_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvsle_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsle.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsle.h(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsle.w(<8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvsle_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvsle_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsle.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsle.w(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsle.d(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvsle_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvsle_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsle.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsle.d(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvslei.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvslei_b(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xvslei_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvslei.b $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvslei.b(<32 x i8> %va, i32 1) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvslei.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvslei_h(<16 x i16> %va) nounwind { +; CHECK-LABEL: lasx_xvslei_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvslei.h $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvslei.h(<16 x i16> %va, i32 1) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvslei.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvslei_w(<8 x i32> %va) nounwind { +; CHECK-LABEL: lasx_xvslei_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvslei.w $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvslei.w(<8 x i32> %va, i32 1) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvslei.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvslei_d(<4 x i64> %va) nounwind { +; CHECK-LABEL: lasx_xvslei_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvslei.d $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvslei.d(<4 x i64> %va, i32 1) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvsle.bu(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvsle_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvsle_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsle.bu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsle.bu(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsle.hu(<16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvsle_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvsle_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsle.hu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsle.hu(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsle.wu(<8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvsle_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvsle_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsle.wu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsle.wu(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsle.du(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvsle_du(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvsle_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsle.du $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsle.du(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvslei.bu(<32 x i8>, i32) + +define <32 x i8> @lasx_xvslei_bu(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xvslei_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvslei.bu $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvslei.bu(<32 x i8> %va, i32 1) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvslei.hu(<16 x i16>, i32) + +define <16 x i16> @lasx_xvslei_hu(<16 x i16> %va) nounwind { +; CHECK-LABEL: lasx_xvslei_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvslei.hu $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvslei.hu(<16 x i16> %va, i32 1) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvslei.wu(<8 x i32>, i32) + +define <8 x i32> @lasx_xvslei_wu(<8 x i32> %va) nounwind { +; CHECK-LABEL: lasx_xvslei_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvslei.wu $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvslei.wu(<8 x i32> %va, i32 1) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvslei.du(<4 x i64>, i32) + +define <4 x i64> @lasx_xvslei_du(<4 x i64> %va) nounwind { +; CHECK-LABEL: lasx_xvslei_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvslei.du $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvslei.du(<4 x i64> %va, i32 1) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sll-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sll-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..bf8205376a6c2e4837b6d63b8080c62a2df269a6 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sll-invalid-imm.ll @@ -0,0 +1,65 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvslli.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvslli_b_lo(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvslli.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvslli.b(<32 x i8> %va, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvslli_b_hi(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvslli.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvslli.b(<32 x i8> %va, i32 8) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvslli.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvslli_h_lo(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvslli.h: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvslli.h(<16 x i16> %va, i32 -1) + ret <16 x i16> %res +} + +define <16 x i16> @lasx_xvslli_h_hi(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvslli.h: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvslli.h(<16 x i16> %va, i32 16) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvslli.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvslli_w_lo(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvslli.w: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvslli.w(<8 x i32> %va, i32 -1) + ret <8 x i32> %res +} + +define <8 x i32> @lasx_xvslli_w_hi(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvslli.w: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvslli.w(<8 x i32> %va, i32 32) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvslli.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvslli_d_lo(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvslli.d: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvslli.d(<4 x i64> %va, i32 -1) + ret <4 x i64> %res +} + +define <4 x i64> @lasx_xvslli_d_hi(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvslli.d: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvslli.d(<4 x i64> %va, i32 64) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sll-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sll-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..b5368a86b5c3bc06b5742251d7bd8f19d86e2a6c --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sll-non-imm.ll @@ -0,0 +1,37 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvslli.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvslli_b(<32 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvslli.b(<32 x i8> %va, i32 %b) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvslli.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvslli_h(<16 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvslli.h(<16 x i16> %va, i32 %b) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvslli.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvslli_w(<8 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvslli.w(<8 x i32> %va, i32 %b) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvslli.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvslli_d(<4 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvslli.d(<4 x i64> %va, i32 %b) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sll.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sll.ll new file mode 100644 index 0000000000000000000000000000000000000000..14110b613dbe3027b82559bf6c1a64540e47dc06 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sll.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvsll.b(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvsll_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvsll_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsll.b $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsll.b(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsll.h(<16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvsll_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvsll_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsll.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsll.h(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsll.w(<8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvsll_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvsll_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsll.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsll.w(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsll.d(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvsll_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvsll_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsll.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsll.d(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvslli.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvslli_b(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xvslli_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvslli.b $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvslli.b(<32 x i8> %va, i32 1) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvslli.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvslli_h(<16 x i16> %va) nounwind { +; CHECK-LABEL: lasx_xvslli_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvslli.h $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvslli.h(<16 x i16> %va, i32 1) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvslli.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvslli_w(<8 x i32> %va) nounwind { +; CHECK-LABEL: lasx_xvslli_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvslli.w $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvslli.w(<8 x i32> %va, i32 1) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvslli.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvslli_d(<4 x i64> %va) nounwind { +; CHECK-LABEL: lasx_xvslli_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvslli.d $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvslli.d(<4 x i64> %va, i32 1) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sllwil-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sllwil-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..18803767d6c01c6b407149b75aea53afdb08e108 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sllwil-invalid-imm.ll @@ -0,0 +1,97 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <16 x i16> @llvm.loongarch.lasx.xvsllwil.h.b(<32 x i8>, i32) + +define <16 x i16> @lasx_xvsllwil_h_b_lo(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsllwil.h.b: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsllwil.h.b(<32 x i8> %va, i32 -1) + ret <16 x i16> %res +} + +define <16 x i16> @lasx_xvsllwil_h_b_hi(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsllwil.h.b: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsllwil.h.b(<32 x i8> %va, i32 8) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsllwil.w.h(<16 x i16>, i32) + +define <8 x i32> @lasx_xvsllwil_w_h_lo(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsllwil.w.h: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsllwil.w.h(<16 x i16> %va, i32 -1) + ret <8 x i32> %res +} + +define <8 x i32> @lasx_xvsllwil_w_h_hi(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsllwil.w.h: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsllwil.w.h(<16 x i16> %va, i32 16) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsllwil.d.w(<8 x i32>, i32) + +define <4 x i64> @lasx_xvsllwil_d_w_lo(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsllwil.d.w: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsllwil.d.w(<8 x i32> %va, i32 -1) + ret <4 x i64> %res +} + +define <4 x i64> @lasx_xvsllwil_d_w_hi(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsllwil.d.w: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsllwil.d.w(<8 x i32> %va, i32 32) + ret <4 x i64> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsllwil.hu.bu(<32 x i8>, i32) + +define <16 x i16> @lasx_xvsllwil_hu_bu_lo(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsllwil.hu.bu: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsllwil.hu.bu(<32 x i8> %va, i32 -1) + ret <16 x i16> %res +} + +define <16 x i16> @lasx_xvsllwil_hu_bu_hi(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsllwil.hu.bu: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsllwil.hu.bu(<32 x i8> %va, i32 8) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsllwil.wu.hu(<16 x i16>, i32) + +define <8 x i32> @lasx_xvsllwil_wu_hu_lo(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsllwil.wu.hu: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsllwil.wu.hu(<16 x i16> %va, i32 -1) + ret <8 x i32> %res +} + +define <8 x i32> @lasx_xvsllwil_wu_hu_hi(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsllwil.wu.hu: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsllwil.wu.hu(<16 x i16> %va, i32 16) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsllwil.du.wu(<8 x i32>, i32) + +define <4 x i64> @lasx_xvsllwil_du_wu_lo(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsllwil.du.wu: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsllwil.du.wu(<8 x i32> %va, i32 -1) + ret <4 x i64> %res +} + +define <4 x i64> @lasx_xvsllwil_du_wu_hi(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsllwil.du.wu: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsllwil.du.wu(<8 x i32> %va, i32 32) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sllwil-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sllwil-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..3f5d4d6316715216dee55f59693bae7dd9e36fbf --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sllwil-non-imm.ll @@ -0,0 +1,55 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <16 x i16> @llvm.loongarch.lasx.xvsllwil.h.b(<32 x i8>, i32) + +define <16 x i16> @lasx_xvsllwil_h_b(<32 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsllwil.h.b(<32 x i8> %va, i32 %b) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsllwil.w.h(<16 x i16>, i32) + +define <8 x i32> @lasx_xvsllwil_w_h(<16 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsllwil.w.h(<16 x i16> %va, i32 %b) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsllwil.d.w(<8 x i32>, i32) + +define <4 x i64> @lasx_xvsllwil_d_w(<8 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsllwil.d.w(<8 x i32> %va, i32 %b) + ret <4 x i64> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsllwil.hu.bu(<32 x i8>, i32) + +define <16 x i16> @lasx_xvsllwil_hu_bu(<32 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsllwil.hu.bu(<32 x i8> %va, i32 %b) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsllwil.wu.hu(<16 x i16>, i32) + +define <8 x i32> @lasx_xvsllwil_wu_hu(<16 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsllwil.wu.hu(<16 x i16> %va, i32 %b) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsllwil.du.wu(<8 x i32>, i32) + +define <4 x i64> @lasx_xvsllwil_du_wu(<8 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsllwil.du.wu(<8 x i32> %va, i32 %b) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sllwil.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sllwil.ll new file mode 100644 index 0000000000000000000000000000000000000000..a72b8a6cbb4f4155894fc5dc7b6251acd1aae83b --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sllwil.ll @@ -0,0 +1,74 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <16 x i16> @llvm.loongarch.lasx.xvsllwil.h.b(<32 x i8>, i32) + +define <16 x i16> @lasx_xvsllwil_h_b(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xvsllwil_h_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsllwil.h.b $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsllwil.h.b(<32 x i8> %va, i32 1) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsllwil.w.h(<16 x i16>, i32) + +define <8 x i32> @lasx_xvsllwil_w_h(<16 x i16> %va) nounwind { +; CHECK-LABEL: lasx_xvsllwil_w_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsllwil.w.h $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsllwil.w.h(<16 x i16> %va, i32 1) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsllwil.d.w(<8 x i32>, i32) + +define <4 x i64> @lasx_xvsllwil_d_w(<8 x i32> %va) nounwind { +; CHECK-LABEL: lasx_xvsllwil_d_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsllwil.d.w $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsllwil.d.w(<8 x i32> %va, i32 1) + ret <4 x i64> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsllwil.hu.bu(<32 x i8>, i32) + +define <16 x i16> @lasx_xvsllwil_hu_bu(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xvsllwil_hu_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsllwil.hu.bu $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsllwil.hu.bu(<32 x i8> %va, i32 1) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsllwil.wu.hu(<16 x i16>, i32) + +define <8 x i32> @lasx_xvsllwil_wu_hu(<16 x i16> %va) nounwind { +; CHECK-LABEL: lasx_xvsllwil_wu_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsllwil.wu.hu $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsllwil.wu.hu(<16 x i16> %va, i32 1) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsllwil.du.wu(<8 x i32>, i32) + +define <4 x i64> @lasx_xvsllwil_du_wu(<8 x i32> %va) nounwind { +; CHECK-LABEL: lasx_xvsllwil_du_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsllwil.du.wu $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsllwil.du.wu(<8 x i32> %va, i32 1) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-slt-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-slt-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..dc0567da4e47e476de8807f7080c93c0dcf03712 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-slt-invalid-imm.ll @@ -0,0 +1,129 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvslti.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvslti_b_lo(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvslti.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvslti.b(<32 x i8> %va, i32 -17) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvslti_b_hi(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvslti.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvslti.b(<32 x i8> %va, i32 16) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvslti.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvslti_h_lo(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvslti.h: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvslti.h(<16 x i16> %va, i32 -17) + ret <16 x i16> %res +} + +define <16 x i16> @lasx_xvslti_h_hi(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvslti.h: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvslti.h(<16 x i16> %va, i32 16) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvslti.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvslti_w_lo(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvslti.w: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvslti.w(<8 x i32> %va, i32 -17) + ret <8 x i32> %res +} + +define <8 x i32> @lasx_xvslti_w_hi(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvslti.w: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvslti.w(<8 x i32> %va, i32 16) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvslti.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvslti_d_lo(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvslti.d: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvslti.d(<4 x i64> %va, i32 -17) + ret <4 x i64> %res +} + +define <4 x i64> @lasx_xvslti_d_hi(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvslti.d: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvslti.d(<4 x i64> %va, i32 16) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvslti.bu(<32 x i8>, i32) + +define <32 x i8> @lasx_xvslti_bu_lo(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvslti.bu: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvslti.bu(<32 x i8> %va, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvslti_bu_hi(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvslti.bu: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvslti.bu(<32 x i8> %va, i32 32) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvslti.hu(<16 x i16>, i32) + +define <16 x i16> @lasx_xvslti_hu_lo(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvslti.hu: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvslti.hu(<16 x i16> %va, i32 -1) + ret <16 x i16> %res +} + +define <16 x i16> @lasx_xvslti_hu_hi(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvslti.hu: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvslti.hu(<16 x i16> %va, i32 32) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvslti.wu(<8 x i32>, i32) + +define <8 x i32> @lasx_xvslti_wu_lo(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvslti.wu: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvslti.wu(<8 x i32> %va, i32 -1) + ret <8 x i32> %res +} + +define <8 x i32> @lasx_xvslti_wu_hi(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvslti.wu: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvslti.wu(<8 x i32> %va, i32 32) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvslti.du(<4 x i64>, i32) + +define <4 x i64> @lasx_xvslti_du_lo(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvslti.du: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvslti.du(<4 x i64> %va, i32 -1) + ret <4 x i64> %res +} + +define <4 x i64> @lasx_xvslti_du_hi(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvslti.du: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvslti.du(<4 x i64> %va, i32 32) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-slt-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-slt-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..a2cedc8d3ef34cff77b1790c620abca473746bac --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-slt-non-imm.ll @@ -0,0 +1,73 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvslti.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvslti_b(<32 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvslti.b(<32 x i8> %va, i32 %b) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvslti.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvslti_h(<16 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvslti.h(<16 x i16> %va, i32 %b) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvslti.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvslti_w(<8 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvslti.w(<8 x i32> %va, i32 %b) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvslti.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvslti_d(<4 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvslti.d(<4 x i64> %va, i32 %b) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvslti.bu(<32 x i8>, i32) + +define <32 x i8> @lasx_xvslti_bu(<32 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvslti.bu(<32 x i8> %va, i32 %b) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvslti.hu(<16 x i16>, i32) + +define <16 x i16> @lasx_xvslti_hu(<16 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvslti.hu(<16 x i16> %va, i32 %b) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvslti.wu(<8 x i32>, i32) + +define <8 x i32> @lasx_xvslti_wu(<8 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvslti.wu(<8 x i32> %va, i32 %b) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvslti.du(<4 x i64>, i32) + +define <4 x i64> @lasx_xvslti_du(<4 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvslti.du(<4 x i64> %va, i32 %b) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-slt.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-slt.ll new file mode 100644 index 0000000000000000000000000000000000000000..3ea87adff110a264ab0a44dc771ff215a17dbe52 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-slt.ll @@ -0,0 +1,194 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvslt.b(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvslt_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvslt_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvslt.b $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvslt.b(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvslt.h(<16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvslt_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvslt_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvslt.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvslt.h(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvslt.w(<8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvslt_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvslt_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvslt.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvslt.w(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvslt.d(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvslt_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvslt_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvslt.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvslt.d(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvslti.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvslti_b(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xvslti_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvslti.b $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvslti.b(<32 x i8> %va, i32 1) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvslti.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvslti_h(<16 x i16> %va) nounwind { +; CHECK-LABEL: lasx_xvslti_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvslti.h $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvslti.h(<16 x i16> %va, i32 1) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvslti.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvslti_w(<8 x i32> %va) nounwind { +; CHECK-LABEL: lasx_xvslti_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvslti.w $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvslti.w(<8 x i32> %va, i32 1) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvslti.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvslti_d(<4 x i64> %va) nounwind { +; CHECK-LABEL: lasx_xvslti_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvslti.d $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvslti.d(<4 x i64> %va, i32 1) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvslt.bu(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvslt_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvslt_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvslt.bu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvslt.bu(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvslt.hu(<16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvslt_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvslt_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvslt.hu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvslt.hu(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvslt.wu(<8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvslt_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvslt_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvslt.wu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvslt.wu(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvslt.du(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvslt_du(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvslt_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvslt.du $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvslt.du(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvslti.bu(<32 x i8>, i32) + +define <32 x i8> @lasx_xvslti_bu(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xvslti_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvslti.bu $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvslti.bu(<32 x i8> %va, i32 1) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvslti.hu(<16 x i16>, i32) + +define <16 x i16> @lasx_xvslti_hu(<16 x i16> %va) nounwind { +; CHECK-LABEL: lasx_xvslti_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvslti.hu $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvslti.hu(<16 x i16> %va, i32 1) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvslti.wu(<8 x i32>, i32) + +define <8 x i32> @lasx_xvslti_wu(<8 x i32> %va) nounwind { +; CHECK-LABEL: lasx_xvslti_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvslti.wu $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvslti.wu(<8 x i32> %va, i32 1) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvslti.du(<4 x i64>, i32) + +define <4 x i64> @lasx_xvslti_du(<4 x i64> %va) nounwind { +; CHECK-LABEL: lasx_xvslti_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvslti.du $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvslti.du(<4 x i64> %va, i32 1) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sra-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sra-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..15b522d5e7e3ae91a56cd73619468d3cb3deed5c --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sra-invalid-imm.ll @@ -0,0 +1,65 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvsrai.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvsrai_b_lo(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrai.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsrai.b(<32 x i8> %va, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvsrai_b_hi(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrai.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsrai.b(<32 x i8> %va, i32 8) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsrai.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvsrai_h_lo(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrai.h: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsrai.h(<16 x i16> %va, i32 -1) + ret <16 x i16> %res +} + +define <16 x i16> @lasx_xvsrai_h_hi(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrai.h: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsrai.h(<16 x i16> %va, i32 16) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsrai.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvsrai_w_lo(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrai.w: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsrai.w(<8 x i32> %va, i32 -1) + ret <8 x i32> %res +} + +define <8 x i32> @lasx_xvsrai_w_hi(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrai.w: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsrai.w(<8 x i32> %va, i32 32) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsrai.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvsrai_d_lo(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrai.d: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsrai.d(<4 x i64> %va, i32 -1) + ret <4 x i64> %res +} + +define <4 x i64> @lasx_xvsrai_d_hi(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrai.d: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsrai.d(<4 x i64> %va, i32 64) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sra-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sra-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..fefee7246ae6db2585799dc90d90a4dc6cfe0884 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sra-non-imm.ll @@ -0,0 +1,37 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvsrai.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvsrai_b(<32 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsrai.b(<32 x i8> %va, i32 %b) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsrai.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvsrai_h(<16 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsrai.h(<16 x i16> %va, i32 %b) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsrai.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvsrai_w(<8 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsrai.w(<8 x i32> %va, i32 %b) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsrai.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvsrai_d(<4 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsrai.d(<4 x i64> %va, i32 %b) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sra.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sra.ll new file mode 100644 index 0000000000000000000000000000000000000000..a7498682559bd362e9098991801e813789e44f43 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sra.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvsra.b(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvsra_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvsra_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsra.b $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsra.b(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsra.h(<16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvsra_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvsra_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsra.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsra.h(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsra.w(<8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvsra_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvsra_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsra.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsra.w(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsra.d(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvsra_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvsra_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsra.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsra.d(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvsrai.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvsrai_b(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xvsrai_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrai.b $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsrai.b(<32 x i8> %va, i32 1) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsrai.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvsrai_h(<16 x i16> %va) nounwind { +; CHECK-LABEL: lasx_xvsrai_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrai.h $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsrai.h(<16 x i16> %va, i32 1) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsrai.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvsrai_w(<8 x i32> %va) nounwind { +; CHECK-LABEL: lasx_xvsrai_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrai.w $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsrai.w(<8 x i32> %va, i32 1) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsrai.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvsrai_d(<4 x i64> %va) nounwind { +; CHECK-LABEL: lasx_xvsrai_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrai.d $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsrai.d(<4 x i64> %va, i32 1) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sran.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sran.ll new file mode 100644 index 0000000000000000000000000000000000000000..f59ae4c196621ffe7387099af08df45c0ed72fdc --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sran.ll @@ -0,0 +1,38 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvsran.b.h(<16 x i16>, <16 x i16>) + +define <32 x i8> @lasx_xvsran_b_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvsran_b_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsran.b.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsran.b.h(<16 x i16> %va, <16 x i16> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsran.h.w(<8 x i32>, <8 x i32>) + +define <16 x i16> @lasx_xvsran_h_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvsran_h_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsran.h.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsran.h.w(<8 x i32> %va, <8 x i32> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsran.w.d(<4 x i64>, <4 x i64>) + +define <8 x i32> @lasx_xvsran_w_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvsran_w_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsran.w.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsran.w.d(<4 x i64> %va, <4 x i64> %vb) + ret <8 x i32> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srani-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srani-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..bedbfc4889d202df9484280cfb74f1cf9c0bd899 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srani-invalid-imm.ll @@ -0,0 +1,65 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvsrani.b.h(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvsrani_b_h_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrani.b.h: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsrani.b.h(<32 x i8> %va, <32 x i8> %vb, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvsrani_b_h_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrani.b.h: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsrani.b.h(<32 x i8> %va, <32 x i8> %vb, i32 16) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsrani.h.w(<16 x i16>, <16 x i16>, i32) + +define <16 x i16> @lasx_xvsrani_h_w_lo(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrani.h.w: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsrani.h.w(<16 x i16> %va, <16 x i16> %vb, i32 -1) + ret <16 x i16> %res +} + +define <16 x i16> @lasx_xvsrani_h_w_hi(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrani.h.w: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsrani.h.w(<16 x i16> %va, <16 x i16> %vb, i32 32) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsrani.w.d(<8 x i32>, <8 x i32>, i32) + +define <8 x i32> @lasx_xvsrani_w_d_lo(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrani.w.d: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsrani.w.d(<8 x i32> %va, <8 x i32> %vb, i32 -1) + ret <8 x i32> %res +} + +define <8 x i32> @lasx_xvsrani_w_d_hi(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrani.w.d: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsrani.w.d(<8 x i32> %va, <8 x i32> %vb, i32 64) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsrani.d.q(<4 x i64>, <4 x i64>, i32) + +define <4 x i64> @lasx_xvsrani_d_q_lo(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrani.d.q: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsrani.d.q(<4 x i64> %va, <4 x i64> %vb, i32 -1) + ret <4 x i64> %res +} + +define <4 x i64> @lasx_xvsrani_d_q_hi(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrani.d.q: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsrani.d.q(<4 x i64> %va, <4 x i64> %vb, i32 128) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srani-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srani-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..3c17f2b6090a9bd2af5eb0c3859a2579c1eb8804 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srani-non-imm.ll @@ -0,0 +1,37 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvsrani.b.h(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvsrani_b_h(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsrani.b.h(<32 x i8> %va, <32 x i8> %vb, i32 %c) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsrani.h.w(<16 x i16>, <16 x i16>, i32) + +define <16 x i16> @lasx_xvsrani_h_w(<16 x i16> %va, <16 x i16> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsrani.h.w(<16 x i16> %va, <16 x i16> %vb, i32 %c) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsrani.w.d(<8 x i32>, <8 x i32>, i32) + +define <8 x i32> @lasx_xvsrani_w_d(<8 x i32> %va, <8 x i32> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsrani.w.d(<8 x i32> %va, <8 x i32> %vb, i32 %c) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsrani.d.q(<4 x i64>, <4 x i64>, i32) + +define <4 x i64> @lasx_xvsrani_d_q(<4 x i64> %va, <4 x i64> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsrani.d.q(<4 x i64> %va, <4 x i64> %vb, i32 %c) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srani.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srani.ll new file mode 100644 index 0000000000000000000000000000000000000000..91fb90da9c525bb0d43c964976895f52a95e6438 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srani.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvsrani.b.h(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvsrani_b_h(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvsrani_b_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrani.b.h $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsrani.b.h(<32 x i8> %va, <32 x i8> %vb, i32 1) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsrani.h.w(<16 x i16>, <16 x i16>, i32) + +define <16 x i16> @lasx_xvsrani_h_w(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvsrani_h_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrani.h.w $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsrani.h.w(<16 x i16> %va, <16 x i16> %vb, i32 1) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsrani.w.d(<8 x i32>, <8 x i32>, i32) + +define <8 x i32> @lasx_xvsrani_w_d(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvsrani_w_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrani.w.d $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsrani.w.d(<8 x i32> %va, <8 x i32> %vb, i32 1) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsrani.d.q(<4 x i64>, <4 x i64>, i32) + +define <4 x i64> @lasx_xvsrani_d_q(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvsrani_d_q: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrani.d.q $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsrani.d.q(<4 x i64> %va, <4 x i64> %vb, i32 1) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srar-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srar-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..e417e3cc5bbfef497d4223fb8ea74ee24ae7a451 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srar-invalid-imm.ll @@ -0,0 +1,65 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvsrari.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvsrari_b_lo(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrari.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsrari.b(<32 x i8> %va, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvsrari_b_hi(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrari.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsrari.b(<32 x i8> %va, i32 8) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsrari.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvsrari_h_lo(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrari.h: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsrari.h(<16 x i16> %va, i32 -1) + ret <16 x i16> %res +} + +define <16 x i16> @lasx_xvsrari_h_hi(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrari.h: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsrari.h(<16 x i16> %va, i32 16) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsrari.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvsrari_w_lo(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrari.w: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsrari.w(<8 x i32> %va, i32 -1) + ret <8 x i32> %res +} + +define <8 x i32> @lasx_xvsrari_w_hi(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrari.w: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsrari.w(<8 x i32> %va, i32 32) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsrari.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvsrari_d_lo(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrari.d: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsrari.d(<4 x i64> %va, i32 -1) + ret <4 x i64> %res +} + +define <4 x i64> @lasx_xvsrari_d_hi(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrari.d: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsrari.d(<4 x i64> %va, i32 64) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srar-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srar-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..15fed7966f1c22f11f529184b108bc006269c148 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srar-non-imm.ll @@ -0,0 +1,37 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvsrari.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvsrari_b(<32 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsrari.b(<32 x i8> %va, i32 %b) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsrari.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvsrari_h(<16 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsrari.h(<16 x i16> %va, i32 %b) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsrari.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvsrari_w(<8 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsrari.w(<8 x i32> %va, i32 %b) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsrari.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvsrari_d(<4 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsrari.d(<4 x i64> %va, i32 %b) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srar.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srar.ll new file mode 100644 index 0000000000000000000000000000000000000000..e2c160557c4dc05d0eae7d6ba23f83c88ac57c04 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srar.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvsrar.b(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvsrar_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvsrar_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrar.b $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsrar.b(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsrar.h(<16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvsrar_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvsrar_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrar.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsrar.h(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsrar.w(<8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvsrar_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvsrar_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrar.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsrar.w(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsrar.d(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvsrar_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvsrar_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrar.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsrar.d(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvsrari.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvsrari_b(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xvsrari_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrari.b $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsrari.b(<32 x i8> %va, i32 1) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsrari.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvsrari_h(<16 x i16> %va) nounwind { +; CHECK-LABEL: lasx_xvsrari_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrari.h $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsrari.h(<16 x i16> %va, i32 1) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsrari.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvsrari_w(<8 x i32> %va) nounwind { +; CHECK-LABEL: lasx_xvsrari_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrari.w $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsrari.w(<8 x i32> %va, i32 1) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsrari.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvsrari_d(<4 x i64> %va) nounwind { +; CHECK-LABEL: lasx_xvsrari_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrari.d $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsrari.d(<4 x i64> %va, i32 1) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarn.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarn.ll new file mode 100644 index 0000000000000000000000000000000000000000..02dd989773ca11ca7c595b3da4f3033a78caf930 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarn.ll @@ -0,0 +1,38 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvsrarn.b.h(<16 x i16>, <16 x i16>) + +define <32 x i8> @lasx_xvsrarn_b_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvsrarn_b_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrarn.b.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsrarn.b.h(<16 x i16> %va, <16 x i16> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsrarn.h.w(<8 x i32>, <8 x i32>) + +define <16 x i16> @lasx_xvsrarn_h_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvsrarn_h_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrarn.h.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsrarn.h.w(<8 x i32> %va, <8 x i32> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsrarn.w.d(<4 x i64>, <4 x i64>) + +define <8 x i32> @lasx_xvsrarn_w_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvsrarn_w_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrarn.w.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsrarn.w.d(<4 x i64> %va, <4 x i64> %vb) + ret <8 x i32> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarni-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarni-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..83e977827e2d0eb5d707d7f0f0f437dc89bf7414 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarni-invalid-imm.ll @@ -0,0 +1,65 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvsrarni.b.h(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvsrarni_b_h_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrarni.b.h: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsrarni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvsrarni_b_h_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrarni.b.h: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsrarni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 16) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsrarni.h.w(<16 x i16>, <16 x i16>, i32) + +define <16 x i16> @lasx_xvsrarni_h_w_lo(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrarni.h.w: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsrarni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 -1) + ret <16 x i16> %res +} + +define <16 x i16> @lasx_xvsrarni_h_w_hi(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrarni.h.w: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsrarni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 32) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsrarni.w.d(<8 x i32>, <8 x i32>, i32) + +define <8 x i32> @lasx_xvsrarni_w_d_lo(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrarni.w.d: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsrarni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 -1) + ret <8 x i32> %res +} + +define <8 x i32> @lasx_xvsrarni_w_d_hi(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrarni.w.d: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsrarni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 64) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsrarni.d.q(<4 x i64>, <4 x i64>, i32) + +define <4 x i64> @lasx_xvsrarni_d_q_lo(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrarni.d.q: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsrarni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 -1) + ret <4 x i64> %res +} + +define <4 x i64> @lasx_xvsrarni_d_q_hi(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrarni.d.q: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsrarni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 128) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarni-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarni-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..eb577a29fb33b5dc60af02e9d08d6ab4eac07b91 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarni-non-imm.ll @@ -0,0 +1,37 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvsrarni.b.h(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvsrarni_b_h(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsrarni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 %c) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsrarni.h.w(<16 x i16>, <16 x i16>, i32) + +define <16 x i16> @lasx_xvsrarni_h_w(<16 x i16> %va, <16 x i16> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsrarni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 %c) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsrarni.w.d(<8 x i32>, <8 x i32>, i32) + +define <8 x i32> @lasx_xvsrarni_w_d(<8 x i32> %va, <8 x i32> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsrarni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 %c) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsrarni.d.q(<4 x i64>, <4 x i64>, i32) + +define <4 x i64> @lasx_xvsrarni_d_q(<4 x i64> %va, <4 x i64> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsrarni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 %c) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarni.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarni.ll new file mode 100644 index 0000000000000000000000000000000000000000..a7d2c37397936c0d72524d0d929f98eb4c8fb034 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srarni.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvsrarni.b.h(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvsrarni_b_h(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvsrarni_b_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrarni.b.h $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsrarni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 1) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsrarni.h.w(<16 x i16>, <16 x i16>, i32) + +define <16 x i16> @lasx_xvsrarni_h_w(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvsrarni_h_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrarni.h.w $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsrarni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 1) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsrarni.w.d(<8 x i32>, <8 x i32>, i32) + +define <8 x i32> @lasx_xvsrarni_w_d(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvsrarni_w_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrarni.w.d $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsrarni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 1) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsrarni.d.q(<4 x i64>, <4 x i64>, i32) + +define <4 x i64> @lasx_xvsrarni_d_q(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvsrarni_d_q: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrarni.d.q $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsrarni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 1) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srl-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srl-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..3ab02dcb97edd8eb0077a556243d6ae190494308 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srl-invalid-imm.ll @@ -0,0 +1,65 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvsrli.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvsrli_b_lo(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrli.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsrli.b(<32 x i8> %va, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvsrli_b_hi(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrli.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsrli.b(<32 x i8> %va, i32 8) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsrli.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvsrli_h_lo(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrli.h: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsrli.h(<16 x i16> %va, i32 -1) + ret <16 x i16> %res +} + +define <16 x i16> @lasx_xvsrli_h_hi(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrli.h: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsrli.h(<16 x i16> %va, i32 16) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsrli.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvsrli_w_lo(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrli.w: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsrli.w(<8 x i32> %va, i32 -1) + ret <8 x i32> %res +} + +define <8 x i32> @lasx_xvsrli_w_hi(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrli.w: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsrli.w(<8 x i32> %va, i32 32) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsrli.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvsrli_d_lo(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrli.d: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsrli.d(<4 x i64> %va, i32 -1) + ret <4 x i64> %res +} + +define <4 x i64> @lasx_xvsrli_d_hi(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrli.d: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsrli.d(<4 x i64> %va, i32 64) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srl-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srl-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..bc085aeaa232a06c813fdf3082db505c2c653f74 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srl-non-imm.ll @@ -0,0 +1,37 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvsrli.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvsrli_b(<32 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsrli.b(<32 x i8> %va, i32 %b) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsrli.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvsrli_h(<16 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsrli.h(<16 x i16> %va, i32 %b) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsrli.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvsrli_w(<8 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsrli.w(<8 x i32> %va, i32 %b) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsrli.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvsrli_d(<4 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsrli.d(<4 x i64> %va, i32 %b) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srl.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srl.ll new file mode 100644 index 0000000000000000000000000000000000000000..7b2992f2ca3bca6e3f52265f5ad7935c26559b1e --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srl.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvsrl.b(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvsrl_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvsrl_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrl.b $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsrl.b(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsrl.h(<16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvsrl_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvsrl_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrl.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsrl.h(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsrl.w(<8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvsrl_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvsrl_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrl.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsrl.w(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsrl.d(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvsrl_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvsrl_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrl.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsrl.d(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvsrli.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvsrli_b(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xvsrli_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrli.b $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsrli.b(<32 x i8> %va, i32 1) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsrli.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvsrli_h(<16 x i16> %va) nounwind { +; CHECK-LABEL: lasx_xvsrli_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrli.h $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsrli.h(<16 x i16> %va, i32 1) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsrli.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvsrli_w(<8 x i32> %va) nounwind { +; CHECK-LABEL: lasx_xvsrli_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrli.w $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsrli.w(<8 x i32> %va, i32 1) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsrli.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvsrli_d(<4 x i64> %va) nounwind { +; CHECK-LABEL: lasx_xvsrli_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrli.d $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsrli.d(<4 x i64> %va, i32 1) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srln.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srln.ll new file mode 100644 index 0000000000000000000000000000000000000000..dc5c0e016ea0a7e83f07115093d4f8447355b779 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srln.ll @@ -0,0 +1,38 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvsrln.b.h(<16 x i16>, <16 x i16>) + +define <32 x i8> @lasx_xvsrln_b_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvsrln_b_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrln.b.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsrln.b.h(<16 x i16> %va, <16 x i16> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsrln.h.w(<8 x i32>, <8 x i32>) + +define <16 x i16> @lasx_xvsrln_h_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvsrln_h_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrln.h.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsrln.h.w(<8 x i32> %va, <8 x i32> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsrln.w.d(<4 x i64>, <4 x i64>) + +define <8 x i32> @lasx_xvsrln_w_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvsrln_w_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrln.w.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsrln.w.d(<4 x i64> %va, <4 x i64> %vb) + ret <8 x i32> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlni-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlni-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..9e7c94305630b51a371aedde0c40ee9d777e95fd --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlni-invalid-imm.ll @@ -0,0 +1,65 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvsrlni.b.h(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvsrlni_b_h_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrlni.b.h: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsrlni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvsrlni_b_h_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrlni.b.h: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsrlni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 16) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsrlni.h.w(<16 x i16>, <16 x i16>, i32) + +define <16 x i16> @lasx_xvsrlni_h_w_lo(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrlni.h.w: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsrlni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 -1) + ret <16 x i16> %res +} + +define <16 x i16> @lasx_xvsrlni_h_w_hi(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrlni.h.w: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsrlni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 32) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsrlni.w.d(<8 x i32>, <8 x i32>, i32) + +define <8 x i32> @lasx_xvsrlni_w_d_lo(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrlni.w.d: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsrlni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 -1) + ret <8 x i32> %res +} + +define <8 x i32> @lasx_xvsrlni_w_d_hi(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrlni.w.d: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsrlni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 64) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsrlni.d.q(<4 x i64>, <4 x i64>, i32) + +define <4 x i64> @lasx_xvsrlni_d_q_lo(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrlni.d.q: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsrlni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 -1) + ret <4 x i64> %res +} + +define <4 x i64> @lasx_xvsrlni_d_q_hi(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrlni.d.q: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsrlni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 128) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlni-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlni-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..66d8004700034e41e9430cefd15b6aa24baa89f2 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlni-non-imm.ll @@ -0,0 +1,37 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvsrlni.b.h(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvsrlni_b_h(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsrlni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 %c) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsrlni.h.w(<16 x i16>, <16 x i16>, i32) + +define <16 x i16> @lasx_xvsrlni_h_w(<16 x i16> %va, <16 x i16> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsrlni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 %c) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsrlni.w.d(<8 x i32>, <8 x i32>, i32) + +define <8 x i32> @lasx_xvsrlni_w_d(<8 x i32> %va, <8 x i32> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsrlni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 %c) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsrlni.d.q(<4 x i64>, <4 x i64>, i32) + +define <4 x i64> @lasx_xvsrlni_d_q(<4 x i64> %va, <4 x i64> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsrlni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 %c) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlni.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlni.ll new file mode 100644 index 0000000000000000000000000000000000000000..0301ebb195e266b177770cce420650b7fec1afb1 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlni.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvsrlni.b.h(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvsrlni_b_h(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvsrlni_b_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrlni.b.h $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsrlni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 1) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsrlni.h.w(<16 x i16>, <16 x i16>, i32) + +define <16 x i16> @lasx_xvsrlni_h_w(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvsrlni_h_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrlni.h.w $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsrlni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 1) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsrlni.w.d(<8 x i32>, <8 x i32>, i32) + +define <8 x i32> @lasx_xvsrlni_w_d(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvsrlni_w_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrlni.w.d $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsrlni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 1) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsrlni.d.q(<4 x i64>, <4 x i64>, i32) + +define <4 x i64> @lasx_xvsrlni_d_q(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvsrlni_d_q: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrlni.d.q $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsrlni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 1) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlr-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlr-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..52621ddc6f49a37a91e4cd283e5b1379d50f350c --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlr-invalid-imm.ll @@ -0,0 +1,65 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvsrlri.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvsrlri_b_lo(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrlri.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsrlri.b(<32 x i8> %va, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvsrlri_b_hi(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrlri.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsrlri.b(<32 x i8> %va, i32 8) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsrlri.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvsrlri_h_lo(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrlri.h: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsrlri.h(<16 x i16> %va, i32 -1) + ret <16 x i16> %res +} + +define <16 x i16> @lasx_xvsrlri_h_hi(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrlri.h: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsrlri.h(<16 x i16> %va, i32 16) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsrlri.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvsrlri_w_lo(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrlri.w: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsrlri.w(<8 x i32> %va, i32 -1) + ret <8 x i32> %res +} + +define <8 x i32> @lasx_xvsrlri_w_hi(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrlri.w: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsrlri.w(<8 x i32> %va, i32 32) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsrlri.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvsrlri_d_lo(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrlri.d: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsrlri.d(<4 x i64> %va, i32 -1) + ret <4 x i64> %res +} + +define <4 x i64> @lasx_xvsrlri_d_hi(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrlri.d: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsrlri.d(<4 x i64> %va, i32 64) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlr-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlr-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..5663e3475b1224a58bb27187d39c602237f9d6c4 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlr-non-imm.ll @@ -0,0 +1,37 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvsrlri.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvsrlri_b(<32 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsrlri.b(<32 x i8> %va, i32 %b) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsrlri.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvsrlri_h(<16 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsrlri.h(<16 x i16> %va, i32 %b) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsrlri.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvsrlri_w(<8 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsrlri.w(<8 x i32> %va, i32 %b) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsrlri.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvsrlri_d(<4 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsrlri.d(<4 x i64> %va, i32 %b) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlr.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlr.ll new file mode 100644 index 0000000000000000000000000000000000000000..e04504158e27463ec8b1cf003d60ea5900e3c116 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlr.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvsrlr.b(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvsrlr_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvsrlr_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrlr.b $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsrlr.b(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsrlr.h(<16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvsrlr_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvsrlr_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrlr.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsrlr.h(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsrlr.w(<8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvsrlr_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvsrlr_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrlr.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsrlr.w(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsrlr.d(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvsrlr_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvsrlr_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrlr.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsrlr.d(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvsrlri.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvsrlri_b(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xvsrlri_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrlri.b $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsrlri.b(<32 x i8> %va, i32 1) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsrlri.h(<16 x i16>, i32) + +define <16 x i16> @lasx_xvsrlri_h(<16 x i16> %va) nounwind { +; CHECK-LABEL: lasx_xvsrlri_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrlri.h $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsrlri.h(<16 x i16> %va, i32 1) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsrlri.w(<8 x i32>, i32) + +define <8 x i32> @lasx_xvsrlri_w(<8 x i32> %va) nounwind { +; CHECK-LABEL: lasx_xvsrlri_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrlri.w $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsrlri.w(<8 x i32> %va, i32 1) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsrlri.d(<4 x i64>, i32) + +define <4 x i64> @lasx_xvsrlri_d(<4 x i64> %va) nounwind { +; CHECK-LABEL: lasx_xvsrlri_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrlri.d $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsrlri.d(<4 x i64> %va, i32 1) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrn.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrn.ll new file mode 100644 index 0000000000000000000000000000000000000000..1e7df379c6e1e007cd8a80e4f5bc3426daef8c3b --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrn.ll @@ -0,0 +1,38 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvsrlrn.b.h(<16 x i16>, <16 x i16>) + +define <32 x i8> @lasx_xvsrlrn_b_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvsrlrn_b_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrlrn.b.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsrlrn.b.h(<16 x i16> %va, <16 x i16> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsrlrn.h.w(<8 x i32>, <8 x i32>) + +define <16 x i16> @lasx_xvsrlrn_h_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvsrlrn_h_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrlrn.h.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsrlrn.h.w(<8 x i32> %va, <8 x i32> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsrlrn.w.d(<4 x i64>, <4 x i64>) + +define <8 x i32> @lasx_xvsrlrn_w_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvsrlrn_w_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrlrn.w.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsrlrn.w.d(<4 x i64> %va, <4 x i64> %vb) + ret <8 x i32> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrni-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrni-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..2d65a75b175a3d6cd6292148dae4931da4b61982 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrni-invalid-imm.ll @@ -0,0 +1,65 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvsrlrni.b.h(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvsrlrni_b_h_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrlrni.b.h: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsrlrni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvsrlrni_b_h_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrlrni.b.h: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsrlrni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 16) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsrlrni.h.w(<16 x i16>, <16 x i16>, i32) + +define <16 x i16> @lasx_xvsrlrni_h_w_lo(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrlrni.h.w: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsrlrni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 -1) + ret <16 x i16> %res +} + +define <16 x i16> @lasx_xvsrlrni_h_w_hi(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrlrni.h.w: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsrlrni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 32) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsrlrni.w.d(<8 x i32>, <8 x i32>, i32) + +define <8 x i32> @lasx_xvsrlrni_w_d_lo(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrlrni.w.d: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsrlrni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 -1) + ret <8 x i32> %res +} + +define <8 x i32> @lasx_xvsrlrni_w_d_hi(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrlrni.w.d: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsrlrni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 64) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsrlrni.d.q(<4 x i64>, <4 x i64>, i32) + +define <4 x i64> @lasx_xvsrlrni_d_q_lo(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrlrni.d.q: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsrlrni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 -1) + ret <4 x i64> %res +} + +define <4 x i64> @lasx_xvsrlrni_d_q_hi(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvsrlrni.d.q: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsrlrni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 128) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrni-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrni-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..82da0d21d013e0b9fc5fa49db3f41b68c775a702 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrni-non-imm.ll @@ -0,0 +1,37 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvsrlrni.b.h(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvsrlrni_b_h(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsrlrni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 %c) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsrlrni.h.w(<16 x i16>, <16 x i16>, i32) + +define <16 x i16> @lasx_xvsrlrni_h_w(<16 x i16> %va, <16 x i16> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsrlrni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 %c) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsrlrni.w.d(<8 x i32>, <8 x i32>, i32) + +define <8 x i32> @lasx_xvsrlrni_w_d(<8 x i32> %va, <8 x i32> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsrlrni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 %c) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsrlrni.d.q(<4 x i64>, <4 x i64>, i32) + +define <4 x i64> @lasx_xvsrlrni_d_q(<4 x i64> %va, <4 x i64> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsrlrni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 %c) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrni.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrni.ll new file mode 100644 index 0000000000000000000000000000000000000000..56dbafe8b1ac38d1850d202eef91c3c446a41942 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-srlrni.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvsrlrni.b.h(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvsrlrni_b_h(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvsrlrni_b_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrlrni.b.h $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsrlrni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 1) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsrlrni.h.w(<16 x i16>, <16 x i16>, i32) + +define <16 x i16> @lasx_xvsrlrni_h_w(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvsrlrni_h_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrlrni.h.w $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsrlrni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 1) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsrlrni.w.d(<8 x i32>, <8 x i32>, i32) + +define <8 x i32> @lasx_xvsrlrni_w_d(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvsrlrni_w_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrlrni.w.d $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsrlrni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 1) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsrlrni.d.q(<4 x i64>, <4 x i64>, i32) + +define <4 x i64> @lasx_xvsrlrni_d_q(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvsrlrni_d_q: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsrlrni.d.q $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsrlrni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 1) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssran.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssran.ll new file mode 100644 index 0000000000000000000000000000000000000000..da1857dad14512d3bd549fa84d431b2975c97987 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssran.ll @@ -0,0 +1,74 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvssran.b.h(<16 x i16>, <16 x i16>) + +define <32 x i8> @lasx_xvssran_b_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvssran_b_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssran.b.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvssran.b.h(<16 x i16> %va, <16 x i16> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvssran.h.w(<8 x i32>, <8 x i32>) + +define <16 x i16> @lasx_xvssran_h_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvssran_h_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssran.h.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvssran.h.w(<8 x i32> %va, <8 x i32> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvssran.w.d(<4 x i64>, <4 x i64>) + +define <8 x i32> @lasx_xvssran_w_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvssran_w_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssran.w.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvssran.w.d(<4 x i64> %va, <4 x i64> %vb) + ret <8 x i32> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvssran.bu.h(<16 x i16>, <16 x i16>) + +define <32 x i8> @lasx_xvssran_bu_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvssran_bu_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssran.bu.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvssran.bu.h(<16 x i16> %va, <16 x i16> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvssran.hu.w(<8 x i32>, <8 x i32>) + +define <16 x i16> @lasx_xvssran_hu_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvssran_hu_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssran.hu.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvssran.hu.w(<8 x i32> %va, <8 x i32> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvssran.wu.d(<4 x i64>, <4 x i64>) + +define <8 x i32> @lasx_xvssran_wu_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvssran_wu_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssran.wu.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvssran.wu.d(<4 x i64> %va, <4 x i64> %vb) + ret <8 x i32> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrani-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrani-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..e10d5d7bd4882c35c039118685db85ff22e4c850 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrani-invalid-imm.ll @@ -0,0 +1,129 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvssrani.b.h(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvssrani_b_h_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrani.b.h: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvssrani.b.h(<32 x i8> %va, <32 x i8> %vb, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvssrani_b_h_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrani.b.h: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvssrani.b.h(<32 x i8> %va, <32 x i8> %vb, i32 16) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvssrani.h.w(<16 x i16>, <16 x i16>, i32) + +define <16 x i16> @lasx_xvssrani_h_w_lo(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrani.h.w: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvssrani.h.w(<16 x i16> %va, <16 x i16> %vb, i32 -1) + ret <16 x i16> %res +} + +define <16 x i16> @lasx_xvssrani_h_w_hi(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrani.h.w: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvssrani.h.w(<16 x i16> %va, <16 x i16> %vb, i32 32) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvssrani.w.d(<8 x i32>, <8 x i32>, i32) + +define <8 x i32> @lasx_xvssrani_w_d_lo(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrani.w.d: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvssrani.w.d(<8 x i32> %va, <8 x i32> %vb, i32 -1) + ret <8 x i32> %res +} + +define <8 x i32> @lasx_xvssrani_w_d_hi(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrani.w.d: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvssrani.w.d(<8 x i32> %va, <8 x i32> %vb, i32 64) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvssrani.d.q(<4 x i64>, <4 x i64>, i32) + +define <4 x i64> @lasx_xvssrani_d_q_lo(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrani.d.q: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvssrani.d.q(<4 x i64> %va, <4 x i64> %vb, i32 -1) + ret <4 x i64> %res +} + +define <4 x i64> @lasx_xvssrani_d_q_hi(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrani.d.q: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvssrani.d.q(<4 x i64> %va, <4 x i64> %vb, i32 128) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvssrani.bu.h(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvssrani_bu_h_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrani.bu.h: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvssrani.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvssrani_bu_h_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrani.bu.h: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvssrani.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 16) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvssrani.hu.w(<16 x i16>, <16 x i16>, i32) + +define <16 x i16> @lasx_xvssrani_hu_w_lo(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrani.hu.w: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvssrani.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 -1) + ret <16 x i16> %res +} + +define <16 x i16> @lasx_xvssrani_hu_w_hi(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrani.hu.w: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvssrani.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 32) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvssrani.wu.d(<8 x i32>, <8 x i32>, i32) + +define <8 x i32> @lasx_xvssrani_wu_d_lo(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrani.wu.d: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvssrani.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 -1) + ret <8 x i32> %res +} + +define <8 x i32> @lasx_xvssrani_wu_d_hi(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrani.wu.d: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvssrani.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 64) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvssrani.du.q(<4 x i64>, <4 x i64>, i32) + +define <4 x i64> @lasx_xvssrani_du_q_lo(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrani.du.q: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvssrani.du.q(<4 x i64> %va, <4 x i64> %vb, i32 -1) + ret <4 x i64> %res +} + +define <4 x i64> @lasx_xvssrani_du_q_hi(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrani.du.q: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvssrani.du.q(<4 x i64> %va, <4 x i64> %vb, i32 128) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrani-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrani-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..a928cc2de8c81818e127aecbb38b4777268ea324 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrani-non-imm.ll @@ -0,0 +1,73 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvssrani.b.h(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvssrani_b_h(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvssrani.b.h(<32 x i8> %va, <32 x i8> %vb, i32 %c) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvssrani.h.w(<16 x i16>, <16 x i16>, i32) + +define <16 x i16> @lasx_xvssrani_h_w(<16 x i16> %va, <16 x i16> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvssrani.h.w(<16 x i16> %va, <16 x i16> %vb, i32 %c) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvssrani.w.d(<8 x i32>, <8 x i32>, i32) + +define <8 x i32> @lasx_xvssrani_w_d(<8 x i32> %va, <8 x i32> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvssrani.w.d(<8 x i32> %va, <8 x i32> %vb, i32 %c) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvssrani.d.q(<4 x i64>, <4 x i64>, i32) + +define <4 x i64> @lasx_xvssrani_d_q(<4 x i64> %va, <4 x i64> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvssrani.d.q(<4 x i64> %va, <4 x i64> %vb, i32 %c) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvssrani.bu.h(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvssrani_bu_h(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvssrani.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 %c) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvssrani.hu.w(<16 x i16>, <16 x i16>, i32) + +define <16 x i16> @lasx_xvssrani_hu_w(<16 x i16> %va, <16 x i16> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvssrani.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 %c) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvssrani.wu.d(<8 x i32>, <8 x i32>, i32) + +define <8 x i32> @lasx_xvssrani_wu_d(<8 x i32> %va, <8 x i32> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvssrani.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 %c) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvssrani.du.q(<4 x i64>, <4 x i64>, i32) + +define <4 x i64> @lasx_xvssrani_du_q(<4 x i64> %va, <4 x i64> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvssrani.du.q(<4 x i64> %va, <4 x i64> %vb, i32 %c) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrani.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrani.ll new file mode 100644 index 0000000000000000000000000000000000000000..9efa659b4a1e0f26b1c62c914fc2fd9fea673234 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrani.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvssrani.b.h(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvssrani_b_h(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrani_b_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrani.b.h $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvssrani.b.h(<32 x i8> %va, <32 x i8> %vb, i32 1) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvssrani.h.w(<16 x i16>, <16 x i16>, i32) + +define <16 x i16> @lasx_xvssrani_h_w(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrani_h_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrani.h.w $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvssrani.h.w(<16 x i16> %va, <16 x i16> %vb, i32 1) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvssrani.w.d(<8 x i32>, <8 x i32>, i32) + +define <8 x i32> @lasx_xvssrani_w_d(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrani_w_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrani.w.d $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvssrani.w.d(<8 x i32> %va, <8 x i32> %vb, i32 1) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvssrani.d.q(<4 x i64>, <4 x i64>, i32) + +define <4 x i64> @lasx_xvssrani_d_q(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrani_d_q: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrani.d.q $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvssrani.d.q(<4 x i64> %va, <4 x i64> %vb, i32 1) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvssrani.bu.h(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvssrani_bu_h(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrani_bu_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrani.bu.h $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvssrani.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 1) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvssrani.hu.w(<16 x i16>, <16 x i16>, i32) + +define <16 x i16> @lasx_xvssrani_hu_w(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrani_hu_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrani.hu.w $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvssrani.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 1) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvssrani.wu.d(<8 x i32>, <8 x i32>, i32) + +define <8 x i32> @lasx_xvssrani_wu_d(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrani_wu_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrani.wu.d $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvssrani.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 1) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvssrani.du.q(<4 x i64>, <4 x i64>, i32) + +define <4 x i64> @lasx_xvssrani_du_q(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrani_du_q: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrani.du.q $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvssrani.du.q(<4 x i64> %va, <4 x i64> %vb, i32 1) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarn.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarn.ll new file mode 100644 index 0000000000000000000000000000000000000000..b5d59ff06f4d140ac86706079ee42b6b93aa1152 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarn.ll @@ -0,0 +1,74 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvssrarn.b.h(<16 x i16>, <16 x i16>) + +define <32 x i8> @lasx_xvssrarn_b_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrarn_b_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrarn.b.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvssrarn.b.h(<16 x i16> %va, <16 x i16> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvssrarn.h.w(<8 x i32>, <8 x i32>) + +define <16 x i16> @lasx_xvssrarn_h_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrarn_h_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrarn.h.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvssrarn.h.w(<8 x i32> %va, <8 x i32> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvssrarn.w.d(<4 x i64>, <4 x i64>) + +define <8 x i32> @lasx_xvssrarn_w_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrarn_w_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrarn.w.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvssrarn.w.d(<4 x i64> %va, <4 x i64> %vb) + ret <8 x i32> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvssrarn.bu.h(<16 x i16>, <16 x i16>) + +define <32 x i8> @lasx_xvssrarn_bu_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrarn_bu_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrarn.bu.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvssrarn.bu.h(<16 x i16> %va, <16 x i16> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvssrarn.hu.w(<8 x i32>, <8 x i32>) + +define <16 x i16> @lasx_xvssrarn_hu_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrarn_hu_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrarn.hu.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvssrarn.hu.w(<8 x i32> %va, <8 x i32> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvssrarn.wu.d(<4 x i64>, <4 x i64>) + +define <8 x i32> @lasx_xvssrarn_wu_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrarn_wu_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrarn.wu.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvssrarn.wu.d(<4 x i64> %va, <4 x i64> %vb) + ret <8 x i32> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarni-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarni-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..42cd6ac99754e3a21f46d65a14fbae74c30da1c1 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarni-invalid-imm.ll @@ -0,0 +1,129 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvssrarni.b.h(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvssrarni_b_h_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrarni.b.h: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvssrarni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvssrarni_b_h_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrarni.b.h: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvssrarni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 16) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvssrarni.h.w(<16 x i16>, <16 x i16>, i32) + +define <16 x i16> @lasx_xvssrarni_h_w_lo(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrarni.h.w: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvssrarni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 -1) + ret <16 x i16> %res +} + +define <16 x i16> @lasx_xvssrarni_h_w_hi(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrarni.h.w: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvssrarni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 32) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvssrarni.w.d(<8 x i32>, <8 x i32>, i32) + +define <8 x i32> @lasx_xvssrarni_w_d_lo(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrarni.w.d: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvssrarni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 -1) + ret <8 x i32> %res +} + +define <8 x i32> @lasx_xvssrarni_w_d_hi(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrarni.w.d: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvssrarni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 64) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvssrarni.d.q(<4 x i64>, <4 x i64>, i32) + +define <4 x i64> @lasx_xvssrarni_d_q_lo(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrarni.d.q: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvssrarni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 -1) + ret <4 x i64> %res +} + +define <4 x i64> @lasx_xvssrarni_d_q_hi(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrarni.d.q: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvssrarni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 128) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvssrarni.bu.h(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvssrarni_bu_h_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrarni.bu.h: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvssrarni.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvssrarni_bu_h_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrarni.bu.h: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvssrarni.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 16) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvssrarni.hu.w(<16 x i16>, <16 x i16>, i32) + +define <16 x i16> @lasx_xvssrarni_hu_w_lo(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrarni.hu.w: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvssrarni.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 -1) + ret <16 x i16> %res +} + +define <16 x i16> @lasx_xvssrarni_hu_w_hi(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrarni.hu.w: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvssrarni.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 32) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvssrarni.wu.d(<8 x i32>, <8 x i32>, i32) + +define <8 x i32> @lasx_xvssrarni_wu_d_lo(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrarni.wu.d: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvssrarni.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 -1) + ret <8 x i32> %res +} + +define <8 x i32> @lasx_xvssrarni_wu_d_hi(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrarni.wu.d: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvssrarni.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 64) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvssrarni.du.q(<4 x i64>, <4 x i64>, i32) + +define <4 x i64> @lasx_xvssrarni_du_q_lo(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrarni.du.q: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvssrarni.du.q(<4 x i64> %va, <4 x i64> %vb, i32 -1) + ret <4 x i64> %res +} + +define <4 x i64> @lasx_xvssrarni_du_q_hi(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrarni.du.q: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvssrarni.du.q(<4 x i64> %va, <4 x i64> %vb, i32 128) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarni-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarni-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..f050e7d79b0f50887ab6a9436d7d2a4f55da202b --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarni-non-imm.ll @@ -0,0 +1,73 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvssrarni.b.h(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvssrarni_b_h(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvssrarni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 %c) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvssrarni.h.w(<16 x i16>, <16 x i16>, i32) + +define <16 x i16> @lasx_xvssrarni_h_w(<16 x i16> %va, <16 x i16> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvssrarni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 %c) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvssrarni.w.d(<8 x i32>, <8 x i32>, i32) + +define <8 x i32> @lasx_xvssrarni_w_d(<8 x i32> %va, <8 x i32> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvssrarni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 %c) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvssrarni.d.q(<4 x i64>, <4 x i64>, i32) + +define <4 x i64> @lasx_xvssrarni_d_q(<4 x i64> %va, <4 x i64> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvssrarni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 %c) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvssrarni.bu.h(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvssrarni_bu_h(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvssrarni.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 %c) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvssrarni.hu.w(<16 x i16>, <16 x i16>, i32) + +define <16 x i16> @lasx_xvssrarni_hu_w(<16 x i16> %va, <16 x i16> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvssrarni.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 %c) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvssrarni.wu.d(<8 x i32>, <8 x i32>, i32) + +define <8 x i32> @lasx_xvssrarni_wu_d(<8 x i32> %va, <8 x i32> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvssrarni.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 %c) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvssrarni.du.q(<4 x i64>, <4 x i64>, i32) + +define <4 x i64> @lasx_xvssrarni_du_q(<4 x i64> %va, <4 x i64> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvssrarni.du.q(<4 x i64> %va, <4 x i64> %vb, i32 %c) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarni.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarni.ll new file mode 100644 index 0000000000000000000000000000000000000000..da411dad645bbdd61a4486781866df4a1f6522a3 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrarni.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvssrarni.b.h(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvssrarni_b_h(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrarni_b_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrarni.b.h $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvssrarni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 1) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvssrarni.h.w(<16 x i16>, <16 x i16>, i32) + +define <16 x i16> @lasx_xvssrarni_h_w(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrarni_h_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrarni.h.w $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvssrarni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 1) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvssrarni.w.d(<8 x i32>, <8 x i32>, i32) + +define <8 x i32> @lasx_xvssrarni_w_d(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrarni_w_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrarni.w.d $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvssrarni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 1) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvssrarni.d.q(<4 x i64>, <4 x i64>, i32) + +define <4 x i64> @lasx_xvssrarni_d_q(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrarni_d_q: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrarni.d.q $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvssrarni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 1) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvssrarni.bu.h(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvssrarni_bu_h(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrarni_bu_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrarni.bu.h $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvssrarni.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 1) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvssrarni.hu.w(<16 x i16>, <16 x i16>, i32) + +define <16 x i16> @lasx_xvssrarni_hu_w(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrarni_hu_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrarni.hu.w $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvssrarni.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 1) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvssrarni.wu.d(<8 x i32>, <8 x i32>, i32) + +define <8 x i32> @lasx_xvssrarni_wu_d(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrarni_wu_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrarni.wu.d $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvssrarni.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 1) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvssrarni.du.q(<4 x i64>, <4 x i64>, i32) + +define <4 x i64> @lasx_xvssrarni_du_q(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrarni_du_q: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrarni.du.q $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvssrarni.du.q(<4 x i64> %va, <4 x i64> %vb, i32 1) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrln.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrln.ll new file mode 100644 index 0000000000000000000000000000000000000000..c60b5bdf81a03a001528d2b970c74cc55befefdb --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrln.ll @@ -0,0 +1,74 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvssrln.b.h(<16 x i16>, <16 x i16>) + +define <32 x i8> @lasx_xvssrln_b_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrln_b_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrln.b.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvssrln.b.h(<16 x i16> %va, <16 x i16> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvssrln.h.w(<8 x i32>, <8 x i32>) + +define <16 x i16> @lasx_xvssrln_h_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrln_h_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrln.h.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvssrln.h.w(<8 x i32> %va, <8 x i32> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvssrln.w.d(<4 x i64>, <4 x i64>) + +define <8 x i32> @lasx_xvssrln_w_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrln_w_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrln.w.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvssrln.w.d(<4 x i64> %va, <4 x i64> %vb) + ret <8 x i32> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvssrln.bu.h(<16 x i16>, <16 x i16>) + +define <32 x i8> @lasx_xvssrln_bu_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrln_bu_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrln.bu.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvssrln.bu.h(<16 x i16> %va, <16 x i16> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvssrln.hu.w(<8 x i32>, <8 x i32>) + +define <16 x i16> @lasx_xvssrln_hu_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrln_hu_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrln.hu.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvssrln.hu.w(<8 x i32> %va, <8 x i32> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvssrln.wu.d(<4 x i64>, <4 x i64>) + +define <8 x i32> @lasx_xvssrln_wu_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrln_wu_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrln.wu.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvssrln.wu.d(<4 x i64> %va, <4 x i64> %vb) + ret <8 x i32> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlni-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlni-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..26be21a83aa4da6e7f77094f3bb0d38639251cf8 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlni-invalid-imm.ll @@ -0,0 +1,129 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvssrlni.b.h(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvssrlni_b_h_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrlni.b.h: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvssrlni_b_h_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrlni.b.h: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 16) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvssrlni.h.w(<16 x i16>, <16 x i16>, i32) + +define <16 x i16> @lasx_xvssrlni_h_w_lo(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrlni.h.w: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 -1) + ret <16 x i16> %res +} + +define <16 x i16> @lasx_xvssrlni_h_w_hi(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrlni.h.w: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 32) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvssrlni.w.d(<8 x i32>, <8 x i32>, i32) + +define <8 x i32> @lasx_xvssrlni_w_d_lo(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrlni.w.d: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 -1) + ret <8 x i32> %res +} + +define <8 x i32> @lasx_xvssrlni_w_d_hi(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrlni.w.d: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 64) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvssrlni.d.q(<4 x i64>, <4 x i64>, i32) + +define <4 x i64> @lasx_xvssrlni_d_q_lo(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrlni.d.q: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 -1) + ret <4 x i64> %res +} + +define <4 x i64> @lasx_xvssrlni_d_q_hi(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrlni.d.q: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 128) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvssrlni.bu.h(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvssrlni_bu_h_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrlni.bu.h: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlni.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvssrlni_bu_h_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrlni.bu.h: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlni.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 16) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvssrlni.hu.w(<16 x i16>, <16 x i16>, i32) + +define <16 x i16> @lasx_xvssrlni_hu_w_lo(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrlni.hu.w: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlni.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 -1) + ret <16 x i16> %res +} + +define <16 x i16> @lasx_xvssrlni_hu_w_hi(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrlni.hu.w: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlni.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 32) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvssrlni.wu.d(<8 x i32>, <8 x i32>, i32) + +define <8 x i32> @lasx_xvssrlni_wu_d_lo(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrlni.wu.d: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlni.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 -1) + ret <8 x i32> %res +} + +define <8 x i32> @lasx_xvssrlni_wu_d_hi(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrlni.wu.d: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlni.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 64) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvssrlni.du.q(<4 x i64>, <4 x i64>, i32) + +define <4 x i64> @lasx_xvssrlni_du_q_lo(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrlni.du.q: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlni.du.q(<4 x i64> %va, <4 x i64> %vb, i32 -1) + ret <4 x i64> %res +} + +define <4 x i64> @lasx_xvssrlni_du_q_hi(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrlni.du.q: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlni.du.q(<4 x i64> %va, <4 x i64> %vb, i32 128) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlni-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlni-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..72da2a746dd5d69106b7c33bc5a7d79f3340a3af --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlni-non-imm.ll @@ -0,0 +1,73 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvssrlni.b.h(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvssrlni_b_h(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 %c) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvssrlni.h.w(<16 x i16>, <16 x i16>, i32) + +define <16 x i16> @lasx_xvssrlni_h_w(<16 x i16> %va, <16 x i16> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 %c) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvssrlni.w.d(<8 x i32>, <8 x i32>, i32) + +define <8 x i32> @lasx_xvssrlni_w_d(<8 x i32> %va, <8 x i32> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 %c) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvssrlni.d.q(<4 x i64>, <4 x i64>, i32) + +define <4 x i64> @lasx_xvssrlni_d_q(<4 x i64> %va, <4 x i64> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 %c) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvssrlni.bu.h(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvssrlni_bu_h(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlni.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 %c) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvssrlni.hu.w(<16 x i16>, <16 x i16>, i32) + +define <16 x i16> @lasx_xvssrlni_hu_w(<16 x i16> %va, <16 x i16> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlni.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 %c) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvssrlni.wu.d(<8 x i32>, <8 x i32>, i32) + +define <8 x i32> @lasx_xvssrlni_wu_d(<8 x i32> %va, <8 x i32> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlni.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 %c) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvssrlni.du.q(<4 x i64>, <4 x i64>, i32) + +define <4 x i64> @lasx_xvssrlni_du_q(<4 x i64> %va, <4 x i64> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlni.du.q(<4 x i64> %va, <4 x i64> %vb, i32 %c) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlni.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlni.ll new file mode 100644 index 0000000000000000000000000000000000000000..e57dd426bde8ce0eac17b7ee58f20c1e5983a99c --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlni.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvssrlni.b.h(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvssrlni_b_h(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrlni_b_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrlni.b.h $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 1) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvssrlni.h.w(<16 x i16>, <16 x i16>, i32) + +define <16 x i16> @lasx_xvssrlni_h_w(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrlni_h_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrlni.h.w $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 1) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvssrlni.w.d(<8 x i32>, <8 x i32>, i32) + +define <8 x i32> @lasx_xvssrlni_w_d(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrlni_w_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrlni.w.d $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 1) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvssrlni.d.q(<4 x i64>, <4 x i64>, i32) + +define <4 x i64> @lasx_xvssrlni_d_q(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrlni_d_q: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrlni.d.q $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 1) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvssrlni.bu.h(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvssrlni_bu_h(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrlni_bu_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrlni.bu.h $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlni.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 1) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvssrlni.hu.w(<16 x i16>, <16 x i16>, i32) + +define <16 x i16> @lasx_xvssrlni_hu_w(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrlni_hu_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrlni.hu.w $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlni.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 1) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvssrlni.wu.d(<8 x i32>, <8 x i32>, i32) + +define <8 x i32> @lasx_xvssrlni_wu_d(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrlni_wu_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrlni.wu.d $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlni.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 1) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvssrlni.du.q(<4 x i64>, <4 x i64>, i32) + +define <4 x i64> @lasx_xvssrlni_du_q(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrlni_du_q: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrlni.du.q $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlni.du.q(<4 x i64> %va, <4 x i64> %vb, i32 1) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrn.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrn.ll new file mode 100644 index 0000000000000000000000000000000000000000..774cf1bd5e84977362ee25eb7f7c731c3e68e890 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrn.ll @@ -0,0 +1,74 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvssrlrn.b.h(<16 x i16>, <16 x i16>) + +define <32 x i8> @lasx_xvssrlrn_b_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrlrn_b_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrlrn.b.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.b.h(<16 x i16> %va, <16 x i16> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvssrlrn.h.w(<8 x i32>, <8 x i32>) + +define <16 x i16> @lasx_xvssrlrn_h_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrlrn_h_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrlrn.h.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.h.w(<8 x i32> %va, <8 x i32> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvssrlrn.w.d(<4 x i64>, <4 x i64>) + +define <8 x i32> @lasx_xvssrlrn_w_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrlrn_w_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrlrn.w.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.w.d(<4 x i64> %va, <4 x i64> %vb) + ret <8 x i32> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvssrlrn.bu.h(<16 x i16>, <16 x i16>) + +define <32 x i8> @lasx_xvssrlrn_bu_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrlrn_bu_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrlrn.bu.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.bu.h(<16 x i16> %va, <16 x i16> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvssrlrn.hu.w(<8 x i32>, <8 x i32>) + +define <16 x i16> @lasx_xvssrlrn_hu_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrlrn_hu_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrlrn.hu.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.hu.w(<8 x i32> %va, <8 x i32> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvssrlrn.wu.d(<4 x i64>, <4 x i64>) + +define <8 x i32> @lasx_xvssrlrn_wu_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrlrn_wu_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrlrn.wu.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.wu.d(<4 x i64> %va, <4 x i64> %vb) + ret <8 x i32> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrni-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrni-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..cd778e2c0627d2a2c2f1856b82705f4a562b047c --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrni-invalid-imm.ll @@ -0,0 +1,129 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvssrlrni.b.h(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvssrlrni_b_h_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrlrni.b.h: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvssrlrni_b_h_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrlrni.b.h: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 16) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvssrlrni.h.w(<16 x i16>, <16 x i16>, i32) + +define <16 x i16> @lasx_xvssrlrni_h_w_lo(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrlrni.h.w: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 -1) + ret <16 x i16> %res +} + +define <16 x i16> @lasx_xvssrlrni_h_w_hi(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrlrni.h.w: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 32) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvssrlrni.w.d(<8 x i32>, <8 x i32>, i32) + +define <8 x i32> @lasx_xvssrlrni_w_d_lo(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrlrni.w.d: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 -1) + ret <8 x i32> %res +} + +define <8 x i32> @lasx_xvssrlrni_w_d_hi(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrlrni.w.d: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 64) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvssrlrni.d.q(<4 x i64>, <4 x i64>, i32) + +define <4 x i64> @lasx_xvssrlrni_d_q_lo(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrlrni.d.q: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 -1) + ret <4 x i64> %res +} + +define <4 x i64> @lasx_xvssrlrni_d_q_hi(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrlrni.d.q: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 128) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvssrlrni.bu.h(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvssrlrni_bu_h_lo(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrlrni.bu.h: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvssrlrni_bu_h_hi(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrlrni.bu.h: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 16) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvssrlrni.hu.w(<16 x i16>, <16 x i16>, i32) + +define <16 x i16> @lasx_xvssrlrni_hu_w_lo(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrlrni.hu.w: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 -1) + ret <16 x i16> %res +} + +define <16 x i16> @lasx_xvssrlrni_hu_w_hi(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrlrni.hu.w: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 32) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvssrlrni.wu.d(<8 x i32>, <8 x i32>, i32) + +define <8 x i32> @lasx_xvssrlrni_wu_d_lo(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrlrni.wu.d: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 -1) + ret <8 x i32> %res +} + +define <8 x i32> @lasx_xvssrlrni_wu_d_hi(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrlrni.wu.d: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 64) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvssrlrni.du.q(<4 x i64>, <4 x i64>, i32) + +define <4 x i64> @lasx_xvssrlrni_du_q_lo(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrlrni.du.q: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.du.q(<4 x i64> %va, <4 x i64> %vb, i32 -1) + ret <4 x i64> %res +} + +define <4 x i64> @lasx_xvssrlrni_du_q_hi(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lasx.xvssrlrni.du.q: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.du.q(<4 x i64> %va, <4 x i64> %vb, i32 128) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrni-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrni-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..a10c543291499f13896ed6903c6b3d97a718551f --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrni-non-imm.ll @@ -0,0 +1,73 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvssrlrni.b.h(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvssrlrni_b_h(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 %c) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvssrlrni.h.w(<16 x i16>, <16 x i16>, i32) + +define <16 x i16> @lasx_xvssrlrni_h_w(<16 x i16> %va, <16 x i16> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 %c) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvssrlrni.w.d(<8 x i32>, <8 x i32>, i32) + +define <8 x i32> @lasx_xvssrlrni_w_d(<8 x i32> %va, <8 x i32> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 %c) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvssrlrni.d.q(<4 x i64>, <4 x i64>, i32) + +define <4 x i64> @lasx_xvssrlrni_d_q(<4 x i64> %va, <4 x i64> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 %c) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvssrlrni.bu.h(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvssrlrni_bu_h(<32 x i8> %va, <32 x i8> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 %c) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvssrlrni.hu.w(<16 x i16>, <16 x i16>, i32) + +define <16 x i16> @lasx_xvssrlrni_hu_w(<16 x i16> %va, <16 x i16> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 %c) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvssrlrni.wu.d(<8 x i32>, <8 x i32>, i32) + +define <8 x i32> @lasx_xvssrlrni_wu_d(<8 x i32> %va, <8 x i32> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 %c) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvssrlrni.du.q(<4 x i64>, <4 x i64>, i32) + +define <4 x i64> @lasx_xvssrlrni_du_q(<4 x i64> %va, <4 x i64> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.du.q(<4 x i64> %va, <4 x i64> %vb, i32 %c) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrni.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrni.ll new file mode 100644 index 0000000000000000000000000000000000000000..9a80516d8d7838e178e1b9eed629283e18dddb1f --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssrlrni.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvssrlrni.b.h(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvssrlrni_b_h(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrlrni_b_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrlrni.b.h $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.b.h(<32 x i8> %va, <32 x i8> %vb, i32 1) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvssrlrni.h.w(<16 x i16>, <16 x i16>, i32) + +define <16 x i16> @lasx_xvssrlrni_h_w(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrlrni_h_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrlrni.h.w $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.h.w(<16 x i16> %va, <16 x i16> %vb, i32 1) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvssrlrni.w.d(<8 x i32>, <8 x i32>, i32) + +define <8 x i32> @lasx_xvssrlrni_w_d(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrlrni_w_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrlrni.w.d $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.w.d(<8 x i32> %va, <8 x i32> %vb, i32 1) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvssrlrni.d.q(<4 x i64>, <4 x i64>, i32) + +define <4 x i64> @lasx_xvssrlrni_d_q(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrlrni_d_q: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrlrni.d.q $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.d.q(<4 x i64> %va, <4 x i64> %vb, i32 1) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvssrlrni.bu.h(<32 x i8>, <32 x i8>, i32) + +define <32 x i8> @lasx_xvssrlrni_bu_h(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrlrni_bu_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrlrni.bu.h $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.bu.h(<32 x i8> %va, <32 x i8> %vb, i32 1) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvssrlrni.hu.w(<16 x i16>, <16 x i16>, i32) + +define <16 x i16> @lasx_xvssrlrni_hu_w(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrlrni_hu_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrlrni.hu.w $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.hu.w(<16 x i16> %va, <16 x i16> %vb, i32 1) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvssrlrni.wu.d(<8 x i32>, <8 x i32>, i32) + +define <8 x i32> @lasx_xvssrlrni_wu_d(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrlrni_wu_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrlrni.wu.d $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.wu.d(<8 x i32> %va, <8 x i32> %vb, i32 1) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvssrlrni.du.q(<4 x i64>, <4 x i64>, i32) + +define <4 x i64> @lasx_xvssrlrni_du_q(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvssrlrni_du_q: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssrlrni.du.q $xr0, $xr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.du.q(<4 x i64> %va, <4 x i64> %vb, i32 1) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssub.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssub.ll new file mode 100644 index 0000000000000000000000000000000000000000..cd3ccd9f52625048fa1e7d1441700dc6e323694f --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-ssub.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvssub.b(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvssub_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvssub_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssub.b $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvssub.b(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvssub.h(<16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvssub_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvssub_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssub.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvssub.h(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvssub.w(<8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvssub_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvssub_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssub.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvssub.w(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvssub.d(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvssub_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvssub_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssub.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvssub.d(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} + +declare <32 x i8> @llvm.loongarch.lasx.xvssub.bu(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvssub_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvssub_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssub.bu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvssub.bu(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvssub.hu(<16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvssub_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvssub_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssub.hu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvssub.hu(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvssub.wu(<8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvssub_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvssub_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssub.wu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvssub.wu(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvssub.du(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvssub_du(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvssub_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvssub.du $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvssub.du(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-st-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-st-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..0177f2b77b939f5490e20e3b06aa00b36b0b475a --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-st-invalid-imm.ll @@ -0,0 +1,17 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare void @llvm.loongarch.lasx.xvst(<32 x i8>, i8*, i32) + +define void @lasx_xvst_lo(<32 x i8> %va, i8* %p) nounwind { +; CHECK: llvm.loongarch.lasx.xvst: argument out of range +entry: + call void @llvm.loongarch.lasx.xvst(<32 x i8> %va, i8* %p, i32 -2049) + ret void +} + +define void @lasx_xvst_hi(<32 x i8> %va, i8* %p) nounwind { +; CHECK: llvm.loongarch.lasx.xvst: argument out of range +entry: + call void @llvm.loongarch.lasx.xvst(<32 x i8> %va, i8* %p, i32 2048) + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-st-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-st-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..c19207aad6b8cb30dd2dcba85442e1eb49bafa07 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-st-non-imm.ll @@ -0,0 +1,10 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare void @llvm.loongarch.lasx.xvst(<32 x i8>, i8*, i32) + +define void @lasx_xvst(<32 x i8> %va, i8* %p, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + call void @llvm.loongarch.lasx.xvst(<32 x i8> %va, i8* %p, i32 %b) + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-st.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-st.ll new file mode 100644 index 0000000000000000000000000000000000000000..b69e7b813f0c1d65d0c7ac5a1323b34e0415b2f7 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-st.ll @@ -0,0 +1,27 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare void @llvm.loongarch.lasx.xvst(<32 x i8>, i8*, i32) + +define void @lasx_xvst(<32 x i8> %va, i8* %p) nounwind { +; CHECK-LABEL: lasx_xvst: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvst $xr0, $a0, 1 +; CHECK-NEXT: ret +entry: + call void @llvm.loongarch.lasx.xvst(<32 x i8> %va, i8* %p, i32 1) + ret void +} + +declare void @llvm.loongarch.lasx.xvstx(<32 x i8>, i8*, i64) + +define void @lasx_xvstx(<32 x i8> %va, i8* %p) nounwind { +; CHECK-LABEL: lasx_xvstx: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ori $a1, $zero, 1 +; CHECK-NEXT: xvstx $xr0, $a0, $a1 +; CHECK-NEXT: ret +entry: + call void @llvm.loongarch.lasx.xvstx(<32 x i8> %va, i8* %p, i64 1) + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-stelm-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-stelm-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..0ea2484e090df094460a5d0fd279384617156b34 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-stelm-invalid-imm.ll @@ -0,0 +1,121 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare void @llvm.loongarch.lasx.xvstelm.b(<32 x i8>, i8*, i32, i32) + +define void @lasx_xvstelm_b_lo(<32 x i8> %va, i8* %p) nounwind { +; CHECK: llvm.loongarch.lasx.xvstelm.b: argument out of range +entry: + call void @llvm.loongarch.lasx.xvstelm.b(<32 x i8> %va, i8* %p, i32 -129, i32 1) + ret void +} + +define void @lasx_xvstelm_b_hi(<32 x i8> %va, i8* %p) nounwind { +; CHECK: llvm.loongarch.lasx.xvstelm.b: argument out of range +entry: + call void @llvm.loongarch.lasx.xvstelm.b(<32 x i8> %va, i8* %p, i32 128, i32 1) + ret void +} + +define void @lasx_xvstelm_b_idx_lo(<32 x i8> %va, i8* %p) nounwind { +; CHECK: llvm.loongarch.lasx.xvstelm.b: argument out of range +entry: + call void @llvm.loongarch.lasx.xvstelm.b(<32 x i8> %va, i8* %p, i32 1, i32 -1) + ret void +} + +define void @lasx_xvstelm_b_idx_hi(<32 x i8> %va, i8* %p) nounwind { +; CHECK: llvm.loongarch.lasx.xvstelm.b: argument out of range +entry: + call void @llvm.loongarch.lasx.xvstelm.b(<32 x i8> %va, i8* %p, i32 1, i32 32) + ret void +} + +declare void @llvm.loongarch.lasx.xvstelm.h(<16 x i16>, i8*, i32, i32) + +define void @lasx_xvstelm_h_lo(<16 x i16> %va, i8* %p) nounwind { +; CHECK: llvm.loongarch.lasx.xvstelm.h: argument out of range or not a multiple of 2. +entry: + call void @llvm.loongarch.lasx.xvstelm.h(<16 x i16> %va, i8* %p, i32 -258, i32 1) + ret void +} + +define void @lasx_xvstelm_h_hi(<16 x i16> %va, i8* %p) nounwind { +; CHECK: llvm.loongarch.lasx.xvstelm.h: argument out of range or not a multiple of 2. +entry: + call void @llvm.loongarch.lasx.xvstelm.h(<16 x i16> %va, i8* %p, i32 256, i32 1) + ret void +} + +define void @lasx_xvstelm_h_idx_lo(<16 x i16> %va, i8* %p) nounwind { +; CHECK: llvm.loongarch.lasx.xvstelm.h: argument out of range or not a multiple of 2. +entry: + call void @llvm.loongarch.lasx.xvstelm.h(<16 x i16> %va, i8* %p, i32 2, i32 -1) + ret void +} + +define void @lasx_xvstelm_h_idx_hi(<16 x i16> %va, i8* %p) nounwind { +; CHECK: llvm.loongarch.lasx.xvstelm.h: argument out of range or not a multiple of 2. +entry: + call void @llvm.loongarch.lasx.xvstelm.h(<16 x i16> %va, i8* %p, i32 2, i32 16) + ret void +} + +declare void @llvm.loongarch.lasx.xvstelm.w(<8 x i32>, i8*, i32, i32) + +define void @lasx_xvstelm_w_lo(<8 x i32> %va, i8* %p) nounwind { +; CHECK: llvm.loongarch.lasx.xvstelm.w: argument out of range or not a multiple of 4. +entry: + call void @llvm.loongarch.lasx.xvstelm.w(<8 x i32> %va, i8* %p, i32 -516, i32 1) + ret void +} + +define void @lasx_xvstelm_w_hi(<8 x i32> %va, i8* %p) nounwind { +; CHECK: llvm.loongarch.lasx.xvstelm.w: argument out of range or not a multiple of 4. +entry: + call void @llvm.loongarch.lasx.xvstelm.w(<8 x i32> %va, i8* %p, i32 512, i32 1) + ret void +} + +define void @lasx_xvstelm_w_idx_lo(<8 x i32> %va, i8* %p) nounwind { +; CHECK: llvm.loongarch.lasx.xvstelm.w: argument out of range or not a multiple of 4. +entry: + call void @llvm.loongarch.lasx.xvstelm.w(<8 x i32> %va, i8* %p, i32 4, i32 -1) + ret void +} + +define void @lasx_xvstelm_w_idx_hi(<8 x i32> %va, i8* %p) nounwind { +; CHECK: llvm.loongarch.lasx.xvstelm.w: argument out of range or not a multiple of 4. +entry: + call void @llvm.loongarch.lasx.xvstelm.w(<8 x i32> %va, i8* %p, i32 4, i32 8) + ret void +} + +declare void @llvm.loongarch.lasx.xvstelm.d(<4 x i64>, i8*, i32, i32) + +define void @lasx_xvstelm_d_lo(<4 x i64> %va, i8* %p) nounwind { +; CHECK: llvm.loongarch.lasx.xvstelm.d: argument out of range or not a multiple of 8. +entry: + call void @llvm.loongarch.lasx.xvstelm.d(<4 x i64> %va, i8* %p, i32 -1032, i32 1) + ret void +} + +define void @lasx_xvstelm_d_hi(<4 x i64> %va, i8* %p) nounwind { +; CHECK: llvm.loongarch.lasx.xvstelm.d: argument out of range or not a multiple of 8. +entry: + call void @llvm.loongarch.lasx.xvstelm.d(<4 x i64> %va, i8* %p, i32 1024, i32 1) + ret void +} + +define void @lasx_xvstelm_d_idx_lo(<4 x i64> %va, i8* %p) nounwind { +; CHECK: llvm.loongarch.lasx.xvstelm.d: argument out of range or not a multiple of 8. +entry: + call void @llvm.loongarch.lasx.xvstelm.d(<4 x i64> %va, i8* %p, i32 8, i32 -1) + ret void +} + +define void @lasx_xvstelm_d_idx_hi(<4 x i64> %va, i8* %p) nounwind { +; CHECK: llvm.loongarch.lasx.xvstelm.d: argument out of range or not a multiple of 8. +entry: + call void @llvm.loongarch.lasx.xvstelm.d(<4 x i64> %va, i8* %p, i32 8, i32 4) + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-stelm-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-stelm-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..42c7c0da174696ca381b88672b2d16036d667076 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-stelm-non-imm.ll @@ -0,0 +1,65 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare void @llvm.loongarch.lasx.xvstelm.b(<32 x i8>, i8*, i32, i32) + +define void @lasx_xvstelm_b(<32 x i8> %va, i8* %p, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + call void @llvm.loongarch.lasx.xvstelm.b(<32 x i8> %va, i8* %p, i32 %b, i32 1) + ret void +} + +define void @lasx_xvstelm_b_idx(<32 x i8> %va, i8* %p, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + call void @llvm.loongarch.lasx.xvstelm.b(<32 x i8> %va, i8* %p, i32 1, i32 %b) + ret void +} + +declare void @llvm.loongarch.lasx.xvstelm.h(<16 x i16>, i8*, i32, i32) + +define void @lasx_xvstelm_h(<16 x i16> %va, i8* %p, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + call void @llvm.loongarch.lasx.xvstelm.h(<16 x i16> %va, i8* %p, i32 %b, i32 1) + ret void +} + +define void @lasx_xvstelm_h_idx(<16 x i16> %va, i8* %p, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + call void @llvm.loongarch.lasx.xvstelm.h(<16 x i16> %va, i8* %p, i32 2, i32 %b) + ret void +} + +declare void @llvm.loongarch.lasx.xvstelm.w(<8 x i32>, i8*, i32, i32) + +define void @lasx_xvstelm_w(<8 x i32> %va, i8* %p, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + call void @llvm.loongarch.lasx.xvstelm.w(<8 x i32> %va, i8* %p, i32 %b, i32 1) + ret void +} + +define void @lasx_xvstelm_w_idx(<8 x i32> %va, i8* %p, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + call void @llvm.loongarch.lasx.xvstelm.w(<8 x i32> %va, i8* %p, i32 4, i32 %b) + ret void +} + +declare void @llvm.loongarch.lasx.xvstelm.d(<4 x i64>, i8*, i32, i32) + +define void @lasx_xvstelm_d(<4 x i64> %va, i8* %p, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + call void @llvm.loongarch.lasx.xvstelm.d(<4 x i64> %va, i8* %p, i32 %b, i32 1) + ret void +} + +define void @lasx_xvstelm_d_idx(<4 x i64> %va, i8* %p, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + call void @llvm.loongarch.lasx.xvstelm.d(<4 x i64> %va, i8* %p, i32 8, i32 %b) + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-stelm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-stelm.ll new file mode 100644 index 0000000000000000000000000000000000000000..52ef3c4714127d4c2d25a61e9ca02caa859645b1 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-stelm.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare void @llvm.loongarch.lasx.xvstelm.b(<32 x i8>, i8*, i32, i32) + +define void @lasx_xvstelm_b(<32 x i8> %va, i8* %p) nounwind { +; CHECK-LABEL: lasx_xvstelm_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvstelm.b $xr0, $a0, 1, 1 +; CHECK-NEXT: ret +entry: + call void @llvm.loongarch.lasx.xvstelm.b(<32 x i8> %va, i8* %p, i32 1, i32 1) + ret void +} + +declare void @llvm.loongarch.lasx.xvstelm.h(<16 x i16>, i8*, i32, i32) + +define void @lasx_xvstelm_h(<16 x i16> %va, i8* %p) nounwind { +; CHECK-LABEL: lasx_xvstelm_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvstelm.h $xr0, $a0, 2, 1 +; CHECK-NEXT: ret +entry: + call void @llvm.loongarch.lasx.xvstelm.h(<16 x i16> %va, i8* %p, i32 2, i32 1) + ret void +} + +declare void @llvm.loongarch.lasx.xvstelm.w(<8 x i32>, i8*, i32, i32) + +define void @lasx_xvstelm_w(<8 x i32> %va, i8* %p) nounwind { +; CHECK-LABEL: lasx_xvstelm_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvstelm.w $xr0, $a0, 4, 1 +; CHECK-NEXT: ret +entry: + call void @llvm.loongarch.lasx.xvstelm.w(<8 x i32> %va, i8* %p, i32 4, i32 1) + ret void +} + +declare void @llvm.loongarch.lasx.xvstelm.d(<4 x i64>, i8*, i32, i32) + +define void @lasx_xvstelm_d(<4 x i64> %va, i8* %p) nounwind { +; CHECK-LABEL: lasx_xvstelm_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvstelm.d $xr0, $a0, 8, 1 +; CHECK-NEXT: ret +entry: + call void @llvm.loongarch.lasx.xvstelm.d(<4 x i64> %va, i8* %p, i32 8, i32 1) + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sub.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sub.ll new file mode 100644 index 0000000000000000000000000000000000000000..4d69dd83dcde7fdfc7e11cedce17d059f238fecc --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-sub.ll @@ -0,0 +1,62 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvsub.b(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvsub_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvsub_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsub.b $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsub.b(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsub.h(<16 x i16>, <16 x i16>) + +define <16 x i16> @lasx_xvsub_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvsub_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsub.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsub.h(<16 x i16> %va, <16 x i16> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsub.w(<8 x i32>, <8 x i32>) + +define <8 x i32> @lasx_xvsub_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvsub_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsub.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsub.w(<8 x i32> %va, <8 x i32> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsub.d(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvsub_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvsub_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsub.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsub.d(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsub.q(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvsub_q(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvsub_q: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsub.q $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsub.q(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-subi-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-subi-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..810008c17f7e5b424e59f2eeb5c15ac2b9f36099 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-subi-invalid-imm.ll @@ -0,0 +1,65 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvsubi.bu(<32 x i8>, i32) + +define <32 x i8> @lasx_xvsubi_bu_lo(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsubi.bu: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsubi.bu(<32 x i8> %va, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvsubi_bu_hi(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsubi.bu: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsubi.bu(<32 x i8> %va, i32 32) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsubi.hu(<16 x i16>, i32) + +define <16 x i16> @lasx_xvsubi_hu_lo(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsubi.hu: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsubi.hu(<16 x i16> %va, i32 -1) + ret <16 x i16> %res +} + +define <16 x i16> @lasx_xvsubi_hu_hi(<16 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsubi.hu: argument out of range +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsubi.hu(<16 x i16> %va, i32 32) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsubi.wu(<8 x i32>, i32) + +define <8 x i32> @lasx_xvsubi_wu_lo(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsubi.wu: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsubi.wu(<8 x i32> %va, i32 -1) + ret <8 x i32> %res +} + +define <8 x i32> @lasx_xvsubi_wu_hi(<8 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsubi.wu: argument out of range +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsubi.wu(<8 x i32> %va, i32 32) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsubi.du(<4 x i64>, i32) + +define <4 x i64> @lasx_xvsubi_du_lo(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsubi.du: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsubi.du(<4 x i64> %va, i32 -1) + ret <4 x i64> %res +} + +define <4 x i64> @lasx_xvsubi_du_hi(<4 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvsubi.du: argument out of range +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsubi.du(<4 x i64> %va, i32 32) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-subi-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-subi-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..924b89ce9d6c423b7b84781157efd9122933800b --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-subi-non-imm.ll @@ -0,0 +1,37 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvsubi.bu(<32 x i8>, i32) + +define <32 x i8> @lasx_xvsubi_bu(<32 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsubi.bu(<32 x i8> %va, i32 %b) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsubi.hu(<16 x i16>, i32) + +define <16 x i16> @lasx_xvsubi_hu(<16 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsubi.hu(<16 x i16> %va, i32 %b) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsubi.wu(<8 x i32>, i32) + +define <8 x i32> @lasx_xvsubi_wu(<8 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsubi.wu(<8 x i32> %va, i32 %b) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsubi.du(<4 x i64>, i32) + +define <4 x i64> @lasx_xvsubi_du(<4 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsubi.du(<4 x i64> %va, i32 %b) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-subi.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-subi.ll new file mode 100644 index 0000000000000000000000000000000000000000..cc3235ff4657d4e484eff3ef85763cbeaf933815 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-subi.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvsubi.bu(<32 x i8>, i32) + +define <32 x i8> @lasx_xvsubi_bu(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xvsubi_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsubi.bu $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvsubi.bu(<32 x i8> %va, i32 1) + ret <32 x i8> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsubi.hu(<16 x i16>, i32) + +define <16 x i16> @lasx_xvsubi_hu(<16 x i16> %va) nounwind { +; CHECK-LABEL: lasx_xvsubi_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsubi.hu $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsubi.hu(<16 x i16> %va, i32 1) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsubi.wu(<8 x i32>, i32) + +define <8 x i32> @lasx_xvsubi_wu(<8 x i32> %va) nounwind { +; CHECK-LABEL: lasx_xvsubi_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsubi.wu $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsubi.wu(<8 x i32> %va, i32 1) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsubi.du(<4 x i64>, i32) + +define <4 x i64> @lasx_xvsubi_du(<4 x i64> %va) nounwind { +; CHECK-LABEL: lasx_xvsubi_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsubi.du $xr0, $xr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsubi.du(<4 x i64> %va, i32 1) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-subw.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-subw.ll new file mode 100644 index 0000000000000000000000000000000000000000..6f203e8949900beaa3d91cb8991609db99c4ac45 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-subw.ll @@ -0,0 +1,194 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.b(<32 x i8>, <32 x i8>) + +define <16 x i16> @lasx_xvsubwev_h_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvsubwev_h_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsubwev.h.b $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.b(<32 x i8> %va, <32 x i8> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.h(<16 x i16>, <16 x i16>) + +define <8 x i32> @lasx_xvsubwev_w_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvsubwev_w_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsubwev.w.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.h(<16 x i16> %va, <16 x i16> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.w(<8 x i32>, <8 x i32>) + +define <4 x i64> @lasx_xvsubwev_d_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvsubwev_d_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsubwev.d.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.w(<8 x i32> %va, <8 x i32> %vb) + ret <4 x i64> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.d(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvsubwev_q_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvsubwev_q_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsubwev.q.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.d(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.bu(<32 x i8>, <32 x i8>) + +define <16 x i16> @lasx_xvsubwev_h_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvsubwev_h_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsubwev.h.bu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.bu(<32 x i8> %va, <32 x i8> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.hu(<16 x i16>, <16 x i16>) + +define <8 x i32> @lasx_xvsubwev_w_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvsubwev_w_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsubwev.w.hu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.hu(<16 x i16> %va, <16 x i16> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.wu(<8 x i32>, <8 x i32>) + +define <4 x i64> @lasx_xvsubwev_d_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvsubwev_d_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsubwev.d.wu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.wu(<8 x i32> %va, <8 x i32> %vb) + ret <4 x i64> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.du(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvsubwev_q_du(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvsubwev_q_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsubwev.q.du $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.du(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.b(<32 x i8>, <32 x i8>) + +define <16 x i16> @lasx_xvsubwod_h_b(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvsubwod_h_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsubwod.h.b $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.b(<32 x i8> %va, <32 x i8> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.h(<16 x i16>, <16 x i16>) + +define <8 x i32> @lasx_xvsubwod_w_h(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvsubwod_w_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsubwod.w.h $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.h(<16 x i16> %va, <16 x i16> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.w(<8 x i32>, <8 x i32>) + +define <4 x i64> @lasx_xvsubwod_d_w(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvsubwod_d_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsubwod.d.w $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.w(<8 x i32> %va, <8 x i32> %vb) + ret <4 x i64> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.d(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvsubwod_q_d(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvsubwod_q_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsubwod.q.d $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.d(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} + +declare <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.bu(<32 x i8>, <32 x i8>) + +define <16 x i16> @lasx_xvsubwod_h_bu(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvsubwod_h_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsubwod.h.bu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.bu(<32 x i8> %va, <32 x i8> %vb) + ret <16 x i16> %res +} + +declare <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.hu(<16 x i16>, <16 x i16>) + +define <8 x i32> @lasx_xvsubwod_w_hu(<16 x i16> %va, <16 x i16> %vb) nounwind { +; CHECK-LABEL: lasx_xvsubwod_w_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsubwod.w.hu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.hu(<16 x i16> %va, <16 x i16> %vb) + ret <8 x i32> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.wu(<8 x i32>, <8 x i32>) + +define <4 x i64> @lasx_xvsubwod_d_wu(<8 x i32> %va, <8 x i32> %vb) nounwind { +; CHECK-LABEL: lasx_xvsubwod_d_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsubwod.d.wu $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.wu(<8 x i32> %va, <8 x i32> %vb) + ret <4 x i64> %res +} + +declare <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.du(<4 x i64>, <4 x i64>) + +define <4 x i64> @lasx_xvsubwod_q_du(<4 x i64> %va, <4 x i64> %vb) nounwind { +; CHECK-LABEL: lasx_xvsubwod_q_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvsubwod.q.du $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.du(<4 x i64> %va, <4 x i64> %vb) + ret <4 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-xor.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-xor.ll new file mode 100644 index 0000000000000000000000000000000000000000..6395b3d6f2e7a8c0aafb2eed863314789126b9c3 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-xor.ll @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvxor.v(<32 x i8>, <32 x i8>) + +define <32 x i8> @lasx_xvxor_v(<32 x i8> %va, <32 x i8> %vb) nounwind { +; CHECK-LABEL: lasx_xvxor_v: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvxor.v(<32 x i8> %va, <32 x i8> %vb) + ret <32 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-xori-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-xori-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..0170d204cf425bbe61cd8a6f8197bb0afdc814a4 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-xori-invalid-imm.ll @@ -0,0 +1,17 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvxori.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvxori_b_lo(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvxori.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvxori.b(<32 x i8> %va, i32 -1) + ret <32 x i8> %res +} + +define <32 x i8> @lasx_xvxori_b_hi(<32 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lasx.xvxori.b: argument out of range +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvxori.b(<32 x i8> %va, i32 256) + ret <32 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-xori-non-imm.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-xori-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..1478f691a1cc6e3c8a3e0e65859fc9198f880b65 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-xori-non-imm.ll @@ -0,0 +1,10 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lasx < %s 2>&1 | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvxori.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvxori_b(<32 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvxori.b(<32 x i8> %va, i32 %b) + ret <32 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-xori.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-xori.ll new file mode 100644 index 0000000000000000000000000000000000000000..c71d7e7311656c988f8baa27e92a6b0f78cbf3f6 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-xori.ll @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +declare <32 x i8> @llvm.loongarch.lasx.xvxori.b(<32 x i8>, i32) + +define <32 x i8> @lasx_xvxori_b(<32 x i8> %va) nounwind { +; CHECK-LABEL: lasx_xvxori_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvxori.b $xr0, $xr0, 3 +; CHECK-NEXT: ret +entry: + %res = call <32 x i8> @llvm.loongarch.lasx.xvxori.b(<32 x i8> %va, i32 3) + ret <32 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/add.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/add.ll new file mode 100644 index 0000000000000000000000000000000000000000..8e4d0dc6f1c380f9163c6895a2f96781b58e35ff --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/add.ll @@ -0,0 +1,122 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +define void @add_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: add_v32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvadd.b $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <32 x i8>, ptr %a0 + %v1 = load <32 x i8>, ptr %a1 + %v2 = add <32 x i8> %v0, %v1 + store <32 x i8> %v2, ptr %res + ret void +} + +define void @add_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: add_v16i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvadd.h $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i16>, ptr %a0 + %v1 = load <16 x i16>, ptr %a1 + %v2 = add <16 x i16> %v0, %v1 + store <16 x i16> %v2, ptr %res + ret void +} + +define void @add_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: add_v8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvadd.w $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i32>, ptr %a0 + %v1 = load <8 x i32>, ptr %a1 + %v2 = add <8 x i32> %v0, %v1 + store <8 x i32> %v2, ptr %res + ret void +} + +define void @add_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: add_v4i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvadd.d $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i64>, ptr %a0 + %v1 = load <4 x i64>, ptr %a1 + %v2 = add <4 x i64> %v0, %v1 + store <4 x i64> %v2, ptr %res + ret void +} + +define void @add_v32i8_31(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: add_v32i8_31: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvaddi.bu $xr0, $xr0, 31 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <32 x i8>, ptr %a0 + %v1 = add <32 x i8> %v0, + store <32 x i8> %v1, ptr %res + ret void +} + +define void @add_v16i16_31(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: add_v16i16_31: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvaddi.hu $xr0, $xr0, 31 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i16>, ptr %a0 + %v1 = add <16 x i16> %v0, + store <16 x i16> %v1, ptr %res + ret void +} + +define void @add_v8i32_31(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: add_v8i32_31: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvaddi.wu $xr0, $xr0, 31 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i32>, ptr %a0 + %v1 = add <8 x i32> %v0, + store <8 x i32> %v1, ptr %res + ret void +} + +define void @add_v4i64_31(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: add_v4i64_31: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvaddi.du $xr0, $xr0, 31 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i64>, ptr %a0 + %v1 = add <4 x i64> %v0, + store <4 x i64> %v1, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/and.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/and.ll new file mode 100644 index 0000000000000000000000000000000000000000..98c87cadeeb5a0cc4e6efd56a071819e78a455f7 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/and.ll @@ -0,0 +1,125 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +define void @and_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: and_v32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvand.v $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <32 x i8>, ptr %a0 + %v1 = load <32 x i8>, ptr %a1 + %v2 = and <32 x i8> %v0, %v1 + store <32 x i8> %v2, ptr %res + ret void +} + +define void @and_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: and_v16i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvand.v $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i16>, ptr %a0 + %v1 = load <16 x i16>, ptr %a1 + %v2 = and <16 x i16> %v0, %v1 + store <16 x i16> %v2, ptr %res + ret void +} + +define void @and_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: and_v8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvand.v $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i32>, ptr %a0 + %v1 = load <8 x i32>, ptr %a1 + %v2 = and <8 x i32> %v0, %v1 + store <8 x i32> %v2, ptr %res + ret void +} + +define void @and_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: and_v4i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvand.v $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i64>, ptr %a0 + %v1 = load <4 x i64>, ptr %a1 + %v2 = and <4 x i64> %v0, %v1 + store <4 x i64> %v2, ptr %res + ret void +} + +define void @and_u_v32i8(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: and_u_v32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvandi.b $xr0, $xr0, 31 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <32 x i8>, ptr %a0 + %v1 = and <32 x i8> %v0, + store <32 x i8> %v1, ptr %res + ret void +} + +define void @and_u_v16i16(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: and_u_v16i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvrepli.h $xr1, 31 +; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i16>, ptr %a0 + %v1 = and <16 x i16> %v0, + store <16 x i16> %v1, ptr %res + ret void +} + +define void @and_u_v8i32(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: and_u_v8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvrepli.w $xr1, 31 +; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i32>, ptr %a0 + %v1 = and <8 x i32> %v0, + store <8 x i32> %v1, ptr %res + ret void +} + +define void @and_u_v4i64(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: and_u_v4i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvrepli.d $xr1, 31 +; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i64>, ptr %a0 + %v1 = and <4 x i64> %v0, + store <4 x i64> %v1, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/ashr.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/ashr.ll new file mode 100644 index 0000000000000000000000000000000000000000..fcbf0f1400fe61fcde0ca8908ae89ab654ed19f9 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/ashr.ll @@ -0,0 +1,178 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +define void @ashr_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: ashr_v32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvsra.b $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <32 x i8>, ptr %a0 + %v1 = load <32 x i8>, ptr %a1 + %v2 = ashr <32 x i8> %v0, %v1 + store <32 x i8> %v2, ptr %res + ret void +} + +define void @ashr_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: ashr_v16i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvsra.h $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i16>, ptr %a0 + %v1 = load <16 x i16>, ptr %a1 + %v2 = ashr <16 x i16> %v0, %v1 + store <16 x i16> %v2, ptr %res + ret void +} + +define void @ashr_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: ashr_v8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvsra.w $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i32>, ptr %a0 + %v1 = load <8 x i32>, ptr %a1 + %v2 = ashr <8 x i32> %v0, %v1 + store <8 x i32> %v2, ptr %res + ret void +} + +define void @ashr_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: ashr_v4i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvsra.d $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i64>, ptr %a0 + %v1 = load <4 x i64>, ptr %a1 + %v2 = ashr <4 x i64> %v0, %v1 + store <4 x i64> %v2, ptr %res + ret void +} + +define void @ashr_v32i8_1(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: ashr_v32i8_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvsrai.b $xr0, $xr0, 1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <32 x i8>, ptr %a0 + %v1 = ashr <32 x i8> %v0, + store <32 x i8> %v1, ptr %res + ret void +} + +define void @ashr_v32i8_7(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: ashr_v32i8_7: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvsrai.b $xr0, $xr0, 7 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <32 x i8>, ptr %a0 + %v1 = ashr <32 x i8> %v0, + store <32 x i8> %v1, ptr %res + ret void +} + +define void @ashr_v16i16_1(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: ashr_v16i16_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvsrai.h $xr0, $xr0, 1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i16>, ptr %a0 + %v1 = ashr <16 x i16> %v0, + store <16 x i16> %v1, ptr %res + ret void +} + +define void @ashr_v16i16_15(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: ashr_v16i16_15: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvsrai.h $xr0, $xr0, 15 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i16>, ptr %a0 + %v1 = ashr <16 x i16> %v0, + store <16 x i16> %v1, ptr %res + ret void +} + +define void @ashr_v8i32_1(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: ashr_v8i32_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvsrai.w $xr0, $xr0, 1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i32>, ptr %a0 + %v1 = ashr <8 x i32> %v0, + store <8 x i32> %v1, ptr %res + ret void +} + +define void @ashr_v8i32_31(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: ashr_v8i32_31: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvsrai.w $xr0, $xr0, 31 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i32>, ptr %a0 + %v1 = ashr <8 x i32> %v0, + store <8 x i32> %v1, ptr %res + ret void +} + +define void @ashr_v4i64_1(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: ashr_v4i64_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvsrai.d $xr0, $xr0, 1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i64>, ptr %a0 + %v1 = ashr <4 x i64> %v0, + store <4 x i64> %v1, ptr %res + ret void +} + +define void @ashr_v4i64_63(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: ashr_v4i64_63: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvsrai.d $xr0, $xr0, 63 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i64>, ptr %a0 + %v1 = ashr <4 x i64> %v0, + store <4 x i64> %v1, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/extractelement.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/extractelement.ll new file mode 100644 index 0000000000000000000000000000000000000000..d53e15faa2be80397bbd9f01fd33dd9da30a30a6 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/extractelement.ll @@ -0,0 +1,238 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +define void @extract_32xi8(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: extract_32xi8: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 1 +; CHECK-NEXT: st.b $a0, $a1, 0 +; CHECK-NEXT: ret + %v = load volatile <32 x i8>, ptr %src + %e = extractelement <32 x i8> %v, i32 1 + store i8 %e, ptr %dst + ret void +} + +define void @extract_16xi16(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: extract_16xi16: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 1 +; CHECK-NEXT: st.h $a0, $a1, 0 +; CHECK-NEXT: ret + %v = load volatile <16 x i16>, ptr %src + %e = extractelement <16 x i16> %v, i32 1 + store i16 %e, ptr %dst + ret void +} + +define void @extract_8xi32(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: extract_8xi32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 1 +; CHECK-NEXT: st.w $a0, $a1, 0 +; CHECK-NEXT: ret + %v = load volatile <8 x i32>, ptr %src + %e = extractelement <8 x i32> %v, i32 1 + store i32 %e, ptr %dst + ret void +} + +define void @extract_4xi64(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: extract_4xi64: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 1 +; CHECK-NEXT: st.d $a0, $a1, 0 +; CHECK-NEXT: ret + %v = load volatile <4 x i64>, ptr %src + %e = extractelement <4 x i64> %v, i32 1 + store i64 %e, ptr %dst + ret void +} + +define void @extract_8xfloat(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: extract_8xfloat: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 7 +; CHECK-NEXT: movgr2fr.w $fa0, $a0 +; CHECK-NEXT: fst.s $fa0, $a1, 0 +; CHECK-NEXT: ret + %v = load volatile <8 x float>, ptr %src + %e = extractelement <8 x float> %v, i32 7 + store float %e, ptr %dst + ret void +} + +define void @extract_4xdouble(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: extract_4xdouble: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 3 +; CHECK-NEXT: movgr2fr.d $fa0, $a0 +; CHECK-NEXT: fst.d $fa0, $a1, 0 +; CHECK-NEXT: ret + %v = load volatile <4 x double>, ptr %src + %e = extractelement <4 x double> %v, i32 3 + store double %e, ptr %dst + ret void +} + +define void @extract_32xi8_idx(ptr %src, ptr %dst, i32 %idx) nounwind { +; CHECK-LABEL: extract_32xi8_idx: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: srli.d $a3, $sp, 5 +; CHECK-NEXT: slli.d $sp, $a3, 5 +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvst $xr0, $sp, 0 +; CHECK-NEXT: addi.d $a0, $sp, 0 +; CHECK-NEXT: bstrins.d $a0, $a2, 4, 0 +; CHECK-NEXT: ld.b $a0, $a0, 0 +; CHECK-NEXT: st.b $a0, $a1, 0 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret + %v = load volatile <32 x i8>, ptr %src + %e = extractelement <32 x i8> %v, i32 %idx + store i8 %e, ptr %dst + ret void +} + +define void @extract_16xi16_idx(ptr %src, ptr %dst, i32 %idx) nounwind { +; CHECK-LABEL: extract_16xi16_idx: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: srli.d $a3, $sp, 5 +; CHECK-NEXT: slli.d $sp, $a3, 5 +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvst $xr0, $sp, 0 +; CHECK-NEXT: addi.d $a0, $sp, 0 +; CHECK-NEXT: bstrins.d $a0, $a2, 4, 1 +; CHECK-NEXT: ld.h $a0, $a0, 0 +; CHECK-NEXT: st.h $a0, $a1, 0 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret + %v = load volatile <16 x i16>, ptr %src + %e = extractelement <16 x i16> %v, i32 %idx + store i16 %e, ptr %dst + ret void +} + +define void @extract_8xi32_idx(ptr %src, ptr %dst, i32 %idx) nounwind { +; CHECK-LABEL: extract_8xi32_idx: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: srli.d $a3, $sp, 5 +; CHECK-NEXT: slli.d $sp, $a3, 5 +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvst $xr0, $sp, 0 +; CHECK-NEXT: addi.d $a0, $sp, 0 +; CHECK-NEXT: bstrins.d $a0, $a2, 4, 2 +; CHECK-NEXT: ld.w $a0, $a0, 0 +; CHECK-NEXT: st.w $a0, $a1, 0 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret + %v = load volatile <8 x i32>, ptr %src + %e = extractelement <8 x i32> %v, i32 %idx + store i32 %e, ptr %dst + ret void +} + +define void @extract_4xi64_idx(ptr %src, ptr %dst, i32 %idx) nounwind { +; CHECK-LABEL: extract_4xi64_idx: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: srli.d $a3, $sp, 5 +; CHECK-NEXT: slli.d $sp, $a3, 5 +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvst $xr0, $sp, 0 +; CHECK-NEXT: addi.d $a0, $sp, 0 +; CHECK-NEXT: bstrins.d $a0, $a2, 4, 3 +; CHECK-NEXT: ld.d $a0, $a0, 0 +; CHECK-NEXT: st.d $a0, $a1, 0 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret + %v = load volatile <4 x i64>, ptr %src + %e = extractelement <4 x i64> %v, i32 %idx + store i64 %e, ptr %dst + ret void +} + +define void @extract_8xfloat_idx(ptr %src, ptr %dst, i32 %idx) nounwind { +; CHECK-LABEL: extract_8xfloat_idx: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: srli.d $a3, $sp, 5 +; CHECK-NEXT: slli.d $sp, $a3, 5 +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvst $xr0, $sp, 0 +; CHECK-NEXT: addi.d $a0, $sp, 0 +; CHECK-NEXT: bstrins.d $a0, $a2, 4, 2 +; CHECK-NEXT: fld.s $fa0, $a0, 0 +; CHECK-NEXT: fst.s $fa0, $a1, 0 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret + %v = load volatile <8 x float>, ptr %src + %e = extractelement <8 x float> %v, i32 %idx + store float %e, ptr %dst + ret void +} + +define void @extract_4xdouble_idx(ptr %src, ptr %dst, i32 %idx) nounwind { +; CHECK-LABEL: extract_4xdouble_idx: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: srli.d $a3, $sp, 5 +; CHECK-NEXT: slli.d $sp, $a3, 5 +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvst $xr0, $sp, 0 +; CHECK-NEXT: addi.d $a0, $sp, 0 +; CHECK-NEXT: bstrins.d $a0, $a2, 4, 3 +; CHECK-NEXT: fld.d $fa0, $a0, 0 +; CHECK-NEXT: fst.d $fa0, $a1, 0 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret + %v = load volatile <4 x double>, ptr %src + %e = extractelement <4 x double> %v, i32 %idx + store double %e, ptr %dst + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fadd.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fadd.ll new file mode 100644 index 0000000000000000000000000000000000000000..365bb305fc5aaa30c7ae9444257a13211680ce5a --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fadd.ll @@ -0,0 +1,34 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +define void @fadd_v8f32(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: fadd_v8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvfadd.s $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %v2 = fadd <8 x float> %v0, %v1 + store <8 x float> %v2, ptr %res + ret void +} + +define void @fadd_v4f64(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: fadd_v4f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvfadd.d $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %v2 = fadd <4 x double> %v0, %v1 + store <4 x double> %v2, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fcmp.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fcmp.ll new file mode 100644 index 0000000000000000000000000000000000000000..ef67dbc100c04519e364fb9bf4e8d0b83c8ebb8c --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fcmp.ll @@ -0,0 +1,692 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +;; TREU +define void @v8f32_fcmp_true(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v8f32_fcmp_true: +; CHECK: # %bb.0: +; CHECK-NEXT: xvrepli.b $xr0, -1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %cmp = fcmp true <8 x float> %v0, %v1 + %ext = sext <8 x i1> %cmp to <8 x i32> + store <8 x i32> %ext, ptr %res + ret void +} + +;; FALSE +define void @v4f64_fcmp_false(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4f64_fcmp_false: +; CHECK: # %bb.0: +; CHECK-NEXT: xvrepli.b $xr0, 0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %cmp = fcmp false <4 x double> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i64> + store <4 x i64> %ext, ptr %res + ret void +} + +;; SETOEQ +define void @v8f32_fcmp_oeq(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v8f32_fcmp_oeq: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvfcmp.ceq.s $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %cmp = fcmp oeq <8 x float> %v0, %v1 + %ext = sext <8 x i1> %cmp to <8 x i32> + store <8 x i32> %ext, ptr %res + ret void +} + +define void @v4f64_fcmp_oeq(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4f64_fcmp_oeq: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvfcmp.ceq.d $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %cmp = fcmp oeq <4 x double> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i64> + store <4 x i64> %ext, ptr %res + ret void +} + +;; SETUEQ +define void @v8f32_fcmp_ueq(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v8f32_fcmp_ueq: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvfcmp.cueq.s $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %cmp = fcmp ueq <8 x float> %v0, %v1 + %ext = sext <8 x i1> %cmp to <8 x i32> + store <8 x i32> %ext, ptr %res + ret void +} + +define void @v4f64_fcmp_ueq(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4f64_fcmp_ueq: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvfcmp.cueq.d $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %cmp = fcmp ueq <4 x double> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i64> + store <4 x i64> %ext, ptr %res + ret void +} + +;; SETEQ +define void @v8f32_fcmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v8f32_fcmp_eq: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvfcmp.ceq.s $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %cmp = fcmp fast oeq <8 x float> %v0, %v1 + %ext = sext <8 x i1> %cmp to <8 x i32> + store <8 x i32> %ext, ptr %res + ret void +} + +define void @v4f64_fcmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4f64_fcmp_eq: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvfcmp.ceq.d $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %cmp = fcmp fast ueq <4 x double> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i64> + store <4 x i64> %ext, ptr %res + ret void +} + +;; SETOLE +define void @v8f32_fcmp_ole(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v8f32_fcmp_ole: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvfcmp.cle.s $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %cmp = fcmp ole <8 x float> %v0, %v1 + %ext = sext <8 x i1> %cmp to <8 x i32> + store <8 x i32> %ext, ptr %res + ret void +} + +define void @v4f64_fcmp_ole(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4f64_fcmp_ole: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvfcmp.cle.d $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %cmp = fcmp ole <4 x double> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i64> + store <4 x i64> %ext, ptr %res + ret void +} + +;; SETULE +define void @v8f32_fcmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v8f32_fcmp_ule: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvfcmp.cule.s $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %cmp = fcmp ule <8 x float> %v0, %v1 + %ext = sext <8 x i1> %cmp to <8 x i32> + store <8 x i32> %ext, ptr %res + ret void +} + +define void @v4f64_fcmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4f64_fcmp_ule: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvfcmp.cule.d $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %cmp = fcmp ule <4 x double> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i64> + store <4 x i64> %ext, ptr %res + ret void +} + +;; SETLE +define void @v8f32_fcmp_le(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v8f32_fcmp_le: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvfcmp.cle.s $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %cmp = fcmp fast ole <8 x float> %v0, %v1 + %ext = sext <8 x i1> %cmp to <8 x i32> + store <8 x i32> %ext, ptr %res + ret void +} + +define void @v4f64_fcmp_le(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4f64_fcmp_le: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvfcmp.cle.d $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %cmp = fcmp fast ule <4 x double> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i64> + store <4 x i64> %ext, ptr %res + ret void +} + +;; SETOLT +define void @v8f32_fcmp_olt(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v8f32_fcmp_olt: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvfcmp.clt.s $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %cmp = fcmp olt <8 x float> %v0, %v1 + %ext = sext <8 x i1> %cmp to <8 x i32> + store <8 x i32> %ext, ptr %res + ret void +} + +define void @v4f64_fcmp_olt(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4f64_fcmp_olt: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvfcmp.clt.d $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %cmp = fcmp olt <4 x double> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i64> + store <4 x i64> %ext, ptr %res + ret void +} + +;; SETULT +define void @v8f32_fcmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v8f32_fcmp_ult: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvfcmp.cult.s $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %cmp = fcmp ult <8 x float> %v0, %v1 + %ext = sext <8 x i1> %cmp to <8 x i32> + store <8 x i32> %ext, ptr %res + ret void +} + +define void @v4f64_fcmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4f64_fcmp_ult: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvfcmp.cult.d $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %cmp = fcmp ult <4 x double> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i64> + store <4 x i64> %ext, ptr %res + ret void +} + +;; SETLT +define void @v8f32_fcmp_lt(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v8f32_fcmp_lt: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvfcmp.clt.s $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %cmp = fcmp fast olt <8 x float> %v0, %v1 + %ext = sext <8 x i1> %cmp to <8 x i32> + store <8 x i32> %ext, ptr %res + ret void +} + +define void @v4f64_fcmp_lt(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4f64_fcmp_lt: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvfcmp.clt.d $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %cmp = fcmp fast ult <4 x double> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i64> + store <4 x i64> %ext, ptr %res + ret void +} + +;; SETONE +define void @v8f32_fcmp_one(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v8f32_fcmp_one: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvfcmp.cne.s $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %cmp = fcmp one <8 x float> %v0, %v1 + %ext = sext <8 x i1> %cmp to <8 x i32> + store <8 x i32> %ext, ptr %res + ret void +} + +define void @v4f64_fcmp_one(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4f64_fcmp_one: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvfcmp.cne.d $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %cmp = fcmp one <4 x double> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i64> + store <4 x i64> %ext, ptr %res + ret void +} + +;; SETUNE +define void @v8f32_fcmp_une(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v8f32_fcmp_une: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvfcmp.cune.s $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %cmp = fcmp une <8 x float> %v0, %v1 + %ext = sext <8 x i1> %cmp to <8 x i32> + store <8 x i32> %ext, ptr %res + ret void +} + +define void @v4f64_fcmp_une(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4f64_fcmp_une: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvfcmp.cune.d $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %cmp = fcmp une <4 x double> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i64> + store <4 x i64> %ext, ptr %res + ret void +} + +;; SETNE +define void @v8f32_fcmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v8f32_fcmp_ne: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvfcmp.cne.s $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %cmp = fcmp fast one <8 x float> %v0, %v1 + %ext = sext <8 x i1> %cmp to <8 x i32> + store <8 x i32> %ext, ptr %res + ret void +} + +define void @v4f64_fcmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4f64_fcmp_ne: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvfcmp.cne.d $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %cmp = fcmp fast une <4 x double> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i64> + store <4 x i64> %ext, ptr %res + ret void +} + +;; SETO +define void @v8f32_fcmp_ord(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v8f32_fcmp_ord: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvfcmp.cor.s $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %cmp = fcmp ord <8 x float> %v0, %v1 + %ext = sext <8 x i1> %cmp to <8 x i32> + store <8 x i32> %ext, ptr %res + ret void +} + +define void @v4f64_fcmp_ord(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4f64_fcmp_ord: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvfcmp.cor.d $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %cmp = fcmp ord <4 x double> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i64> + store <4 x i64> %ext, ptr %res + ret void +} + +;; SETUO +define void @v8f32_fcmp_uno(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v8f32_fcmp_uno: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvfcmp.cun.s $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %cmp = fcmp uno <8 x float> %v0, %v1 + %ext = sext <8 x i1> %cmp to <8 x i32> + store <8 x i32> %ext, ptr %res + ret void +} + +define void @v4f64_fcmp_uno(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4f64_fcmp_uno: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvfcmp.cun.d $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %cmp = fcmp uno <4 x double> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i64> + store <4 x i64> %ext, ptr %res + ret void +} + +;; Expand SETOGT +define void @v8f32_fcmp_ogt(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v8f32_fcmp_ogt: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvfcmp.clt.s $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %cmp = fcmp ogt <8 x float> %v0, %v1 + %ext = sext <8 x i1> %cmp to <8 x i32> + store <8 x i32> %ext, ptr %res + ret void +} + +define void @v4f64_fcmp_ogt(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4f64_fcmp_ogt: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvfcmp.clt.d $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %cmp = fcmp ogt <4 x double> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i64> + store <4 x i64> %ext, ptr %res + ret void +} + +;; Expand SETUGT +define void @v8f32_fcmp_ugt(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v8f32_fcmp_ugt: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvfcmp.cult.s $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %cmp = fcmp ugt <8 x float> %v0, %v1 + %ext = sext <8 x i1> %cmp to <8 x i32> + store <8 x i32> %ext, ptr %res + ret void +} + +define void @v4f64_fcmp_ugt(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4f64_fcmp_ugt: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvfcmp.cult.d $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %cmp = fcmp ugt <4 x double> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i64> + store <4 x i64> %ext, ptr %res + ret void +} + +;; Expand SETGT +define void @v8f32_fcmp_gt(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v8f32_fcmp_gt: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvfcmp.clt.s $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %cmp = fcmp fast ogt <8 x float> %v0, %v1 + %ext = sext <8 x i1> %cmp to <8 x i32> + store <8 x i32> %ext, ptr %res + ret void +} + +define void @v4f64_fcmp_gt(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4f64_fcmp_gt: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvfcmp.clt.d $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %cmp = fcmp fast ugt <4 x double> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i64> + store <4 x i64> %ext, ptr %res + ret void +} + +;; Expand SETOGE +define void @v8f32_fcmp_oge(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v8f32_fcmp_oge: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvfcmp.cle.s $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %cmp = fcmp oge <8 x float> %v0, %v1 + %ext = sext <8 x i1> %cmp to <8 x i32> + store <8 x i32> %ext, ptr %res + ret void +} + +define void @v4f64_fcmp_oge(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4f64_fcmp_oge: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvfcmp.cle.d $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %cmp = fcmp oge <4 x double> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i64> + store <4 x i64> %ext, ptr %res + ret void +} + +;; Expand SETUGE +define void @v8f32_fcmp_uge(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v8f32_fcmp_uge: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvfcmp.cule.s $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %cmp = fcmp uge <8 x float> %v0, %v1 + %ext = sext <8 x i1> %cmp to <8 x i32> + store <8 x i32> %ext, ptr %res + ret void +} + +define void @v4f64_fcmp_uge(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4f64_fcmp_uge: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvfcmp.cule.d $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %cmp = fcmp uge <4 x double> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i64> + store <4 x i64> %ext, ptr %res + ret void +} + +;; Expand SETGE +define void @v8f32_fcmp_ge(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v8f32_fcmp_ge: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvfcmp.cle.s $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %cmp = fcmp fast oge <8 x float> %v0, %v1 + %ext = sext <8 x i1> %cmp to <8 x i32> + store <8 x i32> %ext, ptr %res + ret void +} + +define void @v4f64_fcmp_ge(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4f64_fcmp_ge: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvfcmp.cle.d $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %cmp = fcmp fast uge <4 x double> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i64> + store <4 x i64> %ext, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fdiv.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fdiv.ll new file mode 100644 index 0000000000000000000000000000000000000000..6004565b0b784e5db52b6449d1febae0e9e24ea6 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fdiv.ll @@ -0,0 +1,63 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +define void @fdiv_v8f32(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: fdiv_v8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvfdiv.s $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %v2 = fdiv <8 x float> %v0, %v1 + store <8 x float> %v2, ptr %res + ret void +} + +define void @fdiv_v4f64(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: fdiv_v4f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvfdiv.d $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %v2 = fdiv <4 x double> %v0, %v1 + store <4 x double> %v2, ptr %res + ret void +} + +;; 1.0 / vec +define void @one_fdiv_v8f32(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: one_fdiv_v8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvfrecip.s $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x float>, ptr %a0 + %div = fdiv <8 x float> , %v0 + store <8 x float> %div, ptr %res + ret void +} + +define void @one_fdiv_v4f64(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: one_fdiv_v4f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvfrecip.d $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x double>, ptr %a0 + %div = fdiv <4 x double> , %v0 + store <4 x double> %div, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fmul.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fmul.ll new file mode 100644 index 0000000000000000000000000000000000000000..a48dca8d284704b28ae1a9e125b76fc07bcc6074 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fmul.ll @@ -0,0 +1,34 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +define void @fmul_v8f32(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: fmul_v8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvfmul.s $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %v2 = fmul <8 x float> %v0, %v1 + store <8 x float> %v2, ptr %res + ret void +} + +define void @fmul_v4f64(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: fmul_v4f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvfmul.d $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %v2 = fmul <4 x double> %v0, %v1 + store <4 x double> %v2, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fneg.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fneg.ll new file mode 100644 index 0000000000000000000000000000000000000000..5eb468fc55a0ec6685ad9524d87615a1b6305bb4 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fneg.ll @@ -0,0 +1,29 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +define void @fneg_v8f32(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: fneg_v8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvbitrevi.w $xr0, $xr0, 31 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x float>, ptr %a0 + %v1 = fneg <8 x float> %v0 + store <8 x float> %v1, ptr %res + ret void +} +define void @fneg_v4f64(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: fneg_v4f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvbitrevi.d $xr0, $xr0, 63 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x double>, ptr %a0 + %v1 = fneg <4 x double> %v0 + store <4 x double> %v1, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fptosi.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fptosi.ll new file mode 100644 index 0000000000000000000000000000000000000000..0d9f57b57ffae3cf6046c20ffa7d804f44975dca --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fptosi.ll @@ -0,0 +1,57 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +define void @fptosi_v8f32_v8i32(ptr %res, ptr %in){ +; CHECK-LABEL: fptosi_v8f32_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvftintrz.w.s $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x float>, ptr %in + %v1 = fptosi <8 x float> %v0 to <8 x i32> + store <8 x i32> %v1, ptr %res + ret void +} + +define void @fptosi_v4f64_v4i64(ptr %res, ptr %in){ +; CHECK-LABEL: fptosi_v4f64_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvftintrz.l.d $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x double>, ptr %in + %v1 = fptosi <4 x double> %v0 to <4 x i64> + store <4 x i64> %v1, ptr %res + ret void +} + +define void @fptosi_v4f64_v4i32(ptr %res, ptr %in){ +; CHECK-LABEL: fptosi_v4f64_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvpermi.d $xr1, $xr0, 238 +; CHECK-NEXT: xvfcvt.s.d $xr0, $xr1, $xr0 +; CHECK-NEXT: xvftintrz.w.s $xr0, $xr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x double>, ptr %in + %v1 = fptosi <4 x double> %v0 to <4 x i32> + store <4 x i32> %v1, ptr %res + ret void +} + +define void @fptosi_v4f32_v4i64(ptr %res, ptr %in){ +; CHECK-LABEL: fptosi_v4f32_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vftintrz.w.s $vr0, $vr0 +; CHECK-NEXT: vext2xv.d.w $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x float>, ptr %in + %v1 = fptosi <4 x float> %v0 to <4 x i64> + store <4 x i64> %v1, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fptoui.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fptoui.ll new file mode 100644 index 0000000000000000000000000000000000000000..27d70f33cd34e61fe332a52e31390049ec05beb9 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fptoui.ll @@ -0,0 +1,57 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +define void @fptoui_v8f32_v8i32(ptr %res, ptr %in){ +; CHECK-LABEL: fptoui_v8f32_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvftintrz.wu.s $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x float>, ptr %in + %v1 = fptoui <8 x float> %v0 to <8 x i32> + store <8 x i32> %v1, ptr %res + ret void +} + +define void @fptoui_v4f64_v4i64(ptr %res, ptr %in){ +; CHECK-LABEL: fptoui_v4f64_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvftintrz.lu.d $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x double>, ptr %in + %v1 = fptoui <4 x double> %v0 to <4 x i64> + store <4 x i64> %v1, ptr %res + ret void +} + +define void @fptoui_v4f64_v4i32(ptr %res, ptr %in){ +; CHECK-LABEL: fptoui_v4f64_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvpermi.d $xr1, $xr0, 238 +; CHECK-NEXT: xvfcvt.s.d $xr0, $xr1, $xr0 +; CHECK-NEXT: xvftintrz.w.s $xr0, $xr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x double>, ptr %in + %v1 = fptoui <4 x double> %v0 to <4 x i32> + store <4 x i32> %v1, ptr %res + ret void +} + +define void @fptoui_v4f32_v4i64(ptr %res, ptr %in){ +; CHECK-LABEL: fptoui_v4f32_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vftintrz.wu.s $vr0, $vr0 +; CHECK-NEXT: vext2xv.du.wu $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x float>, ptr %in + %v1 = fptoui <4 x float> %v0 to <4 x i64> + store <4 x i64> %v1, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fsub.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fsub.ll new file mode 100644 index 0000000000000000000000000000000000000000..6164aa5a55c7e40e1b2c635bb2bc2ccb49854dc1 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fsub.ll @@ -0,0 +1,34 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +define void @fsub_v8f32(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: fsub_v8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvfsub.s $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %v2 = fsub <8 x float> %v0, %v1 + store <8 x float> %v2, ptr %res + ret void +} + +define void @fsub_v4f64(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: fsub_v4f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvfsub.d $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %v2 = fsub <4 x double> %v0, %v1 + store <4 x double> %v2, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/icmp.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/icmp.ll new file mode 100644 index 0000000000000000000000000000000000000000..6693fe0f6ec7cec0b0a7bedea15611e4e644081b --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/icmp.ll @@ -0,0 +1,939 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +;; SETEQ +define void @v32i8_icmp_eq_imm(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: v32i8_icmp_eq_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvseqi.b $xr0, $xr0, 15 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <32 x i8>, ptr %a0 + %cmp = icmp eq <32 x i8> %v0, + %ext = sext <32 x i1> %cmp to <32 x i8> + store <32 x i8> %ext, ptr %res + ret void +} + +define void @v32i8_icmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v32i8_icmp_eq: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvseq.b $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <32 x i8>, ptr %a0 + %v1 = load <32 x i8>, ptr %a1 + %cmp = icmp eq <32 x i8> %v0, %v1 + %ext = sext <32 x i1> %cmp to <32 x i8> + store <32 x i8> %ext, ptr %res + ret void +} + +define void @v16i16_icmp_eq_imm(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: v16i16_icmp_eq_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvseqi.h $xr0, $xr0, 15 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i16>, ptr %a0 + %cmp = icmp eq <16 x i16> %v0, + %ext = sext <16 x i1> %cmp to <16 x i16> + store <16 x i16> %ext, ptr %res + ret void +} + +define void @v16i16_icmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v16i16_icmp_eq: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvseq.h $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i16>, ptr %a0 + %v1 = load <16 x i16>, ptr %a1 + %cmp = icmp eq <16 x i16> %v0, %v1 + %ext = sext <16 x i1> %cmp to <16 x i16> + store <16 x i16> %ext, ptr %res + ret void +} + +define void @v8i32_icmp_eq_imm(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: v8i32_icmp_eq_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvseqi.w $xr0, $xr0, 15 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i32>, ptr %a0 + %cmp = icmp eq <8 x i32> %v0, + %ext = sext <8 x i1> %cmp to <8 x i32> + store <8 x i32> %ext, ptr %res + ret void +} + +define void @v8i32_icmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v8i32_icmp_eq: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvseq.w $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i32>, ptr %a0 + %v1 = load <8 x i32>, ptr %a1 + %cmp = icmp eq <8 x i32> %v0, %v1 + %ext = sext <8 x i1> %cmp to <8 x i32> + store <8 x i32> %ext, ptr %res + ret void +} + +define void @v4i64_icmp_eq_imm(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: v4i64_icmp_eq_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvseqi.d $xr0, $xr0, 15 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i64>, ptr %a0 + %cmp = icmp eq <4 x i64> %v0, + %ext = sext <4 x i1> %cmp to <4 x i64> + store <4 x i64> %ext, ptr %res + ret void +} + +define void @v4i64_icmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4i64_icmp_eq: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvseq.d $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i64>, ptr %a0 + %v1 = load <4 x i64>, ptr %a1 + %cmp = icmp eq <4 x i64> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i64> + store <4 x i64> %ext, ptr %res + ret void +} + +;; SETLE +define void @v32i8_icmp_sle_imm(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: v32i8_icmp_sle_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvslei.b $xr0, $xr0, 15 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <32 x i8>, ptr %a0 + %cmp = icmp sle <32 x i8> %v0, + %ext = sext <32 x i1> %cmp to <32 x i8> + store <32 x i8> %ext, ptr %res + ret void +} + +define void @v32i8_icmp_sle(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v32i8_icmp_sle: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvsle.b $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <32 x i8>, ptr %a0 + %v1 = load <32 x i8>, ptr %a1 + %cmp = icmp sle <32 x i8> %v0, %v1 + %ext = sext <32 x i1> %cmp to <32 x i8> + store <32 x i8> %ext, ptr %res + ret void +} + +define void @v16i16_icmp_sle_imm(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: v16i16_icmp_sle_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvslei.h $xr0, $xr0, 15 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i16>, ptr %a0 + %cmp = icmp sle <16 x i16> %v0, + %ext = sext <16 x i1> %cmp to <16 x i16> + store <16 x i16> %ext, ptr %res + ret void +} + +define void @v16i16_icmp_sle(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v16i16_icmp_sle: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvsle.h $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i16>, ptr %a0 + %v1 = load <16 x i16>, ptr %a1 + %cmp = icmp sle <16 x i16> %v0, %v1 + %ext = sext <16 x i1> %cmp to <16 x i16> + store <16 x i16> %ext, ptr %res + ret void +} + +define void @v8i32_icmp_sle_imm(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: v8i32_icmp_sle_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvslei.w $xr0, $xr0, 15 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i32>, ptr %a0 + %cmp = icmp sle <8 x i32> %v0, + %ext = sext <8 x i1> %cmp to <8 x i32> + store <8 x i32> %ext, ptr %res + ret void +} + +define void @v8i32_icmp_sle(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v8i32_icmp_sle: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvsle.w $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i32>, ptr %a0 + %v1 = load <8 x i32>, ptr %a1 + %cmp = icmp sle <8 x i32> %v0, %v1 + %ext = sext <8 x i1> %cmp to <8 x i32> + store <8 x i32> %ext, ptr %res + ret void +} + +define void @v4i64_icmp_sle_imm(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: v4i64_icmp_sle_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvslei.d $xr0, $xr0, 15 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i64>, ptr %a0 + %cmp = icmp sle <4 x i64> %v0, + %ext = sext <4 x i1> %cmp to <4 x i64> + store <4 x i64> %ext, ptr %res + ret void +} + +define void @v4i64_icmp_sle(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4i64_icmp_sle: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvsle.d $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i64>, ptr %a0 + %v1 = load <4 x i64>, ptr %a1 + %cmp = icmp sle <4 x i64> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i64> + store <4 x i64> %ext, ptr %res + ret void +} + +;; SETULE +define void @v32i8_icmp_ule_imm(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: v32i8_icmp_ule_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvslei.bu $xr0, $xr0, 31 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <32 x i8>, ptr %a0 + %cmp = icmp ule <32 x i8> %v0, + %ext = sext <32 x i1> %cmp to <32 x i8> + store <32 x i8> %ext, ptr %res + ret void +} + +define void @v32i8_icmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v32i8_icmp_ule: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvsle.bu $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <32 x i8>, ptr %a0 + %v1 = load <32 x i8>, ptr %a1 + %cmp = icmp ule <32 x i8> %v0, %v1 + %ext = sext <32 x i1> %cmp to <32 x i8> + store <32 x i8> %ext, ptr %res + ret void +} + +define void @v16i16_icmp_ule_imm(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: v16i16_icmp_ule_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvslei.hu $xr0, $xr0, 31 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i16>, ptr %a0 + %cmp = icmp ule <16 x i16> %v0, + %ext = sext <16 x i1> %cmp to <16 x i16> + store <16 x i16> %ext, ptr %res + ret void +} + +define void @v16i16_icmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v16i16_icmp_ule: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvsle.hu $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i16>, ptr %a0 + %v1 = load <16 x i16>, ptr %a1 + %cmp = icmp ule <16 x i16> %v0, %v1 + %ext = sext <16 x i1> %cmp to <16 x i16> + store <16 x i16> %ext, ptr %res + ret void +} + +define void @v8i32_icmp_ule_imm(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: v8i32_icmp_ule_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvslei.wu $xr0, $xr0, 31 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i32>, ptr %a0 + %cmp = icmp ule <8 x i32> %v0, + %ext = sext <8 x i1> %cmp to <8 x i32> + store <8 x i32> %ext, ptr %res + ret void +} + +define void @v8i32_icmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v8i32_icmp_ule: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvsle.wu $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i32>, ptr %a0 + %v1 = load <8 x i32>, ptr %a1 + %cmp = icmp ule <8 x i32> %v0, %v1 + %ext = sext <8 x i1> %cmp to <8 x i32> + store <8 x i32> %ext, ptr %res + ret void +} + +define void @v4i64_icmp_ule_imm(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: v4i64_icmp_ule_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvslei.du $xr0, $xr0, 31 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i64>, ptr %a0 + %cmp = icmp ule <4 x i64> %v0, + %ext = sext <4 x i1> %cmp to <4 x i64> + store <4 x i64> %ext, ptr %res + ret void +} + +define void @v4i64_icmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4i64_icmp_ule: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvsle.du $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i64>, ptr %a0 + %v1 = load <4 x i64>, ptr %a1 + %cmp = icmp ule <4 x i64> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i64> + store <4 x i64> %ext, ptr %res + ret void +} + +;; SETLT +define void @v32i8_icmp_slt_imm(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: v32i8_icmp_slt_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvslti.b $xr0, $xr0, 15 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <32 x i8>, ptr %a0 + %cmp = icmp slt <32 x i8> %v0, + %ext = sext <32 x i1> %cmp to <32 x i8> + store <32 x i8> %ext, ptr %res + ret void +} + +define void @v32i8_icmp_slt(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v32i8_icmp_slt: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvslt.b $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <32 x i8>, ptr %a0 + %v1 = load <32 x i8>, ptr %a1 + %cmp = icmp slt <32 x i8> %v0, %v1 + %ext = sext <32 x i1> %cmp to <32 x i8> + store <32 x i8> %ext, ptr %res + ret void +} + +define void @v16i16_icmp_slt_imm(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: v16i16_icmp_slt_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvslti.h $xr0, $xr0, 15 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i16>, ptr %a0 + %cmp = icmp slt <16 x i16> %v0, + %ext = sext <16 x i1> %cmp to <16 x i16> + store <16 x i16> %ext, ptr %res + ret void +} + +define void @v16i16_icmp_slt(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v16i16_icmp_slt: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvslt.h $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i16>, ptr %a0 + %v1 = load <16 x i16>, ptr %a1 + %cmp = icmp slt <16 x i16> %v0, %v1 + %ext = sext <16 x i1> %cmp to <16 x i16> + store <16 x i16> %ext, ptr %res + ret void +} + +define void @v8i32_icmp_slt_imm(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: v8i32_icmp_slt_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvslti.w $xr0, $xr0, 15 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i32>, ptr %a0 + %cmp = icmp slt <8 x i32> %v0, + %ext = sext <8 x i1> %cmp to <8 x i32> + store <8 x i32> %ext, ptr %res + ret void +} + +define void @v8i32_icmp_slt(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v8i32_icmp_slt: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvslt.w $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i32>, ptr %a0 + %v1 = load <8 x i32>, ptr %a1 + %cmp = icmp slt <8 x i32> %v0, %v1 + %ext = sext <8 x i1> %cmp to <8 x i32> + store <8 x i32> %ext, ptr %res + ret void +} + +define void @v4i64_icmp_slt_imm(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: v4i64_icmp_slt_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvslti.d $xr0, $xr0, 15 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i64>, ptr %a0 + %cmp = icmp slt <4 x i64> %v0, + %ext = sext <4 x i1> %cmp to <4 x i64> + store <4 x i64> %ext, ptr %res + ret void +} + +define void @v4i64_icmp_slt(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4i64_icmp_slt: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvslt.d $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i64>, ptr %a0 + %v1 = load <4 x i64>, ptr %a1 + %cmp = icmp slt <4 x i64> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i64> + store <4 x i64> %ext, ptr %res + ret void +} + +;; SETULT +define void @v32i8_icmp_ult_imm(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: v32i8_icmp_ult_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvslti.bu $xr0, $xr0, 31 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <32 x i8>, ptr %a0 + %cmp = icmp ult <32 x i8> %v0, + %ext = sext <32 x i1> %cmp to <32 x i8> + store <32 x i8> %ext, ptr %res + ret void +} + +define void @v32i8_icmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v32i8_icmp_ult: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvslt.bu $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <32 x i8>, ptr %a0 + %v1 = load <32 x i8>, ptr %a1 + %cmp = icmp ult <32 x i8> %v0, %v1 + %ext = sext <32 x i1> %cmp to <32 x i8> + store <32 x i8> %ext, ptr %res + ret void +} + +define void @v16i16_icmp_ult_imm(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: v16i16_icmp_ult_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvslti.hu $xr0, $xr0, 31 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i16>, ptr %a0 + %cmp = icmp ult <16 x i16> %v0, + %ext = sext <16 x i1> %cmp to <16 x i16> + store <16 x i16> %ext, ptr %res + ret void +} + +define void @v16i16_icmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v16i16_icmp_ult: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvslt.hu $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i16>, ptr %a0 + %v1 = load <16 x i16>, ptr %a1 + %cmp = icmp ult <16 x i16> %v0, %v1 + %ext = sext <16 x i1> %cmp to <16 x i16> + store <16 x i16> %ext, ptr %res + ret void +} + +define void @v8i32_icmp_ult_imm(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: v8i32_icmp_ult_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvslti.wu $xr0, $xr0, 31 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i32>, ptr %a0 + %cmp = icmp ult <8 x i32> %v0, + %ext = sext <8 x i1> %cmp to <8 x i32> + store <8 x i32> %ext, ptr %res + ret void +} + +define void @v8i32_icmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v8i32_icmp_ult: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvslt.wu $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i32>, ptr %a0 + %v1 = load <8 x i32>, ptr %a1 + %cmp = icmp ult <8 x i32> %v0, %v1 + %ext = sext <8 x i1> %cmp to <8 x i32> + store <8 x i32> %ext, ptr %res + ret void +} + +define void @v4i64_icmp_ult_imm(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: v4i64_icmp_ult_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvslti.du $xr0, $xr0, 31 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i64>, ptr %a0 + %cmp = icmp ult <4 x i64> %v0, + %ext = sext <4 x i1> %cmp to <4 x i64> + store <4 x i64> %ext, ptr %res + ret void +} + +define void @v4i64_icmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4i64_icmp_ult: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvslt.du $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i64>, ptr %a0 + %v1 = load <4 x i64>, ptr %a1 + %cmp = icmp ult <4 x i64> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i64> + store <4 x i64> %ext, ptr %res + ret void +} + +;; Expand SETNE +define void @v32i8_icmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v32i8_icmp_ne: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvseq.b $xr0, $xr1, $xr0 +; CHECK-NEXT: xvxori.b $xr0, $xr0, 255 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <32 x i8>, ptr %a0 + %v1 = load <32 x i8>, ptr %a1 + %cmp = icmp ne <32 x i8> %v0, %v1 + %ext = sext <32 x i1> %cmp to <32 x i8> + store <32 x i8> %ext, ptr %res + ret void +} + +define void @v16i16_icmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v16i16_icmp_ne: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvseq.h $xr0, $xr1, $xr0 +; CHECK-NEXT: xvrepli.b $xr1, -1 +; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i16>, ptr %a0 + %v1 = load <16 x i16>, ptr %a1 + %cmp = icmp ne <16 x i16> %v0, %v1 + %ext = sext <16 x i1> %cmp to <16 x i16> + store <16 x i16> %ext, ptr %res + ret void +} + +define void @v8i32_icmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v8i32_icmp_ne: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvseq.w $xr0, $xr1, $xr0 +; CHECK-NEXT: xvrepli.b $xr1, -1 +; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i32>, ptr %a0 + %v1 = load <8 x i32>, ptr %a1 + %cmp = icmp ne <8 x i32> %v0, %v1 + %ext = sext <8 x i1> %cmp to <8 x i32> + store <8 x i32> %ext, ptr %res + ret void +} + +define void @v4i64_icmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4i64_icmp_ne: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvseq.d $xr0, $xr1, $xr0 +; CHECK-NEXT: xvrepli.b $xr1, -1 +; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i64>, ptr %a0 + %v1 = load <4 x i64>, ptr %a1 + %cmp = icmp ne <4 x i64> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i64> + store <4 x i64> %ext, ptr %res + ret void +} + +;; Expand SETGE +define void @v32i8_icmp_sge(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v32i8_icmp_sge: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvsle.b $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <32 x i8>, ptr %a0 + %v1 = load <32 x i8>, ptr %a1 + %cmp = icmp sge <32 x i8> %v0, %v1 + %ext = sext <32 x i1> %cmp to <32 x i8> + store <32 x i8> %ext, ptr %res + ret void +} + +define void @v16i16_icmp_sge(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v16i16_icmp_sge: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvsle.h $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i16>, ptr %a0 + %v1 = load <16 x i16>, ptr %a1 + %cmp = icmp sge <16 x i16> %v0, %v1 + %ext = sext <16 x i1> %cmp to <16 x i16> + store <16 x i16> %ext, ptr %res + ret void +} + +define void @v8i32_icmp_sge(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v8i32_icmp_sge: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvsle.w $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i32>, ptr %a0 + %v1 = load <8 x i32>, ptr %a1 + %cmp = icmp sge <8 x i32> %v0, %v1 + %ext = sext <8 x i1> %cmp to <8 x i32> + store <8 x i32> %ext, ptr %res + ret void +} + +define void @v4i64_icmp_sge(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4i64_icmp_sge: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvsle.d $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i64>, ptr %a0 + %v1 = load <4 x i64>, ptr %a1 + %cmp = icmp sge <4 x i64> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i64> + store <4 x i64> %ext, ptr %res + ret void +} + +;; Expand SETUGE +define void @v32i8_icmp_uge(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v32i8_icmp_uge: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvsle.bu $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <32 x i8>, ptr %a0 + %v1 = load <32 x i8>, ptr %a1 + %cmp = icmp uge <32 x i8> %v0, %v1 + %ext = sext <32 x i1> %cmp to <32 x i8> + store <32 x i8> %ext, ptr %res + ret void +} + +define void @v16i16_icmp_uge(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v16i16_icmp_uge: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvsle.hu $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i16>, ptr %a0 + %v1 = load <16 x i16>, ptr %a1 + %cmp = icmp uge <16 x i16> %v0, %v1 + %ext = sext <16 x i1> %cmp to <16 x i16> + store <16 x i16> %ext, ptr %res + ret void +} + +define void @v8i32_icmp_uge(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v8i32_icmp_uge: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvsle.wu $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i32>, ptr %a0 + %v1 = load <8 x i32>, ptr %a1 + %cmp = icmp uge <8 x i32> %v0, %v1 + %ext = sext <8 x i1> %cmp to <8 x i32> + store <8 x i32> %ext, ptr %res + ret void +} + +define void @v4i64_icmp_uge(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4i64_icmp_uge: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvsle.du $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i64>, ptr %a0 + %v1 = load <4 x i64>, ptr %a1 + %cmp = icmp uge <4 x i64> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i64> + store <4 x i64> %ext, ptr %res + ret void +} + +;; Expand SETGT +define void @v32i8_icmp_sgt(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v32i8_icmp_sgt: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvslt.b $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <32 x i8>, ptr %a0 + %v1 = load <32 x i8>, ptr %a1 + %cmp = icmp sgt <32 x i8> %v0, %v1 + %ext = sext <32 x i1> %cmp to <32 x i8> + store <32 x i8> %ext, ptr %res + ret void +} + +define void @v16i16_icmp_sgt(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v16i16_icmp_sgt: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvslt.h $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i16>, ptr %a0 + %v1 = load <16 x i16>, ptr %a1 + %cmp = icmp sgt <16 x i16> %v0, %v1 + %ext = sext <16 x i1> %cmp to <16 x i16> + store <16 x i16> %ext, ptr %res + ret void +} + +define void @v8i32_icmp_sgt(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v8i32_icmp_sgt: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvslt.w $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i32>, ptr %a0 + %v1 = load <8 x i32>, ptr %a1 + %cmp = icmp sgt <8 x i32> %v0, %v1 + %ext = sext <8 x i1> %cmp to <8 x i32> + store <8 x i32> %ext, ptr %res + ret void +} + +define void @v4i64_icmp_sgt(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4i64_icmp_sgt: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvslt.d $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i64>, ptr %a0 + %v1 = load <4 x i64>, ptr %a1 + %cmp = icmp sgt <4 x i64> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i64> + store <4 x i64> %ext, ptr %res + ret void +} + +;; Expand SETUGT +define void @v32i8_icmp_ugt(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v32i8_icmp_ugt: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvslt.bu $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <32 x i8>, ptr %a0 + %v1 = load <32 x i8>, ptr %a1 + %cmp = icmp ugt <32 x i8> %v0, %v1 + %ext = sext <32 x i1> %cmp to <32 x i8> + store <32 x i8> %ext, ptr %res + ret void +} + +define void @v16i16_icmp_ugt(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v16i16_icmp_ugt: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvslt.hu $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i16>, ptr %a0 + %v1 = load <16 x i16>, ptr %a1 + %cmp = icmp ugt <16 x i16> %v0, %v1 + %ext = sext <16 x i1> %cmp to <16 x i16> + store <16 x i16> %ext, ptr %res + ret void +} + +define void @v8i32_icmp_ugt(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v8i32_icmp_ugt: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvslt.wu $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i32>, ptr %a0 + %v1 = load <8 x i32>, ptr %a1 + %cmp = icmp ugt <8 x i32> %v0, %v1 + %ext = sext <8 x i1> %cmp to <8 x i32> + store <8 x i32> %ext, ptr %res + ret void +} + +define void @v4i64_icmp_ugt(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4i64_icmp_ugt: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvslt.du $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i64>, ptr %a0 + %v1 = load <4 x i64>, ptr %a1 + %cmp = icmp ugt <4 x i64> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i64> + store <4 x i64> %ext, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insertelement.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insertelement.ll new file mode 100644 index 0000000000000000000000000000000000000000..544c934f256716ef092bd5af3308c62edb1e1e80 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insertelement.ll @@ -0,0 +1,276 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +define void @insert_32xi8(ptr %src, ptr %dst, i8 %in) nounwind { +; CHECK-LABEL: insert_32xi8: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a2, 1 +; CHECK-NEXT: xvst $xr0, $a1, 0 +; CHECK-NEXT: ret + %v = load volatile <32 x i8>, ptr %src + %v_new = insertelement <32 x i8> %v, i8 %in, i32 1 + store <32 x i8> %v_new, ptr %dst + ret void +} + +define void @insert_32xi8_upper(ptr %src, ptr %dst, i8 %in) nounwind { +; CHECK-LABEL: insert_32xi8_upper: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvori.b $xr1, $xr0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vinsgr2vr.b $vr1, $a2, 0 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: xvst $xr0, $a1, 0 +; CHECK-NEXT: ret + %v = load volatile <32 x i8>, ptr %src + %v_new = insertelement <32 x i8> %v, i8 %in, i32 16 + store <32 x i8> %v_new, ptr %dst + ret void +} + +define void @insert_16xi16(ptr %src, ptr %dst, i16 %in) nounwind { +; CHECK-LABEL: insert_16xi16: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: vinsgr2vr.h $vr0, $a2, 1 +; CHECK-NEXT: xvst $xr0, $a1, 0 +; CHECK-NEXT: ret + %v = load volatile <16 x i16>, ptr %src + %v_new = insertelement <16 x i16> %v, i16 %in, i32 1 + store <16 x i16> %v_new, ptr %dst + ret void +} + +define void @insert_16xi16_upper(ptr %src, ptr %dst, i16 %in) nounwind { +; CHECK-LABEL: insert_16xi16_upper: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvori.b $xr1, $xr0, 0 +; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1 +; CHECK-NEXT: vinsgr2vr.h $vr1, $a2, 0 +; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2 +; CHECK-NEXT: xvst $xr0, $a1, 0 +; CHECK-NEXT: ret + %v = load volatile <16 x i16>, ptr %src + %v_new = insertelement <16 x i16> %v, i16 %in, i32 8 + store <16 x i16> %v_new, ptr %dst + ret void +} + +define void @insert_8xi32(ptr %src, ptr %dst, i32 %in) nounwind { +; CHECK-LABEL: insert_8xi32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvinsgr2vr.w $xr0, $a2, 1 +; CHECK-NEXT: xvst $xr0, $a1, 0 +; CHECK-NEXT: ret + %v = load volatile <8 x i32>, ptr %src + %v_new = insertelement <8 x i32> %v, i32 %in, i32 1 + store <8 x i32> %v_new, ptr %dst + ret void +} + +define void @insert_4xi64(ptr %src, ptr %dst, i64 %in) nounwind { +; CHECK-LABEL: insert_4xi64: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvinsgr2vr.d $xr0, $a2, 1 +; CHECK-NEXT: xvst $xr0, $a1, 0 +; CHECK-NEXT: ret + %v = load volatile <4 x i64>, ptr %src + %v_new = insertelement <4 x i64> %v, i64 %in, i32 1 + store <4 x i64> %v_new, ptr %dst + ret void +} + +define void @insert_8xfloat(ptr %src, ptr %dst, float %in) nounwind { +; CHECK-LABEL: insert_8xfloat: +; CHECK: # %bb.0: +; CHECK-NEXT: movfr2gr.s $a2, $fa0 +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvinsgr2vr.w $xr0, $a2, 1 +; CHECK-NEXT: xvst $xr0, $a1, 0 +; CHECK-NEXT: ret + %v = load volatile <8 x float>, ptr %src + %v_new = insertelement <8 x float> %v, float %in, i32 1 + store <8 x float> %v_new, ptr %dst + ret void +} + +define void @insert_4xdouble(ptr %src, ptr %dst, double %in) nounwind { +; CHECK-LABEL: insert_4xdouble: +; CHECK: # %bb.0: +; CHECK-NEXT: movfr2gr.d $a2, $fa0 +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvinsgr2vr.d $xr0, $a2, 1 +; CHECK-NEXT: xvst $xr0, $a1, 0 +; CHECK-NEXT: ret + %v = load volatile <4 x double>, ptr %src + %v_new = insertelement <4 x double> %v, double %in, i32 1 + store <4 x double> %v_new, ptr %dst + ret void +} + +define void @insert_32xi8_idx(ptr %src, ptr %dst, i8 %in, i32 %idx) nounwind { +; CHECK-LABEL: insert_32xi8_idx: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: srli.d $a4, $sp, 5 +; CHECK-NEXT: slli.d $sp, $a4, 5 +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvst $xr0, $sp, 0 +; CHECK-NEXT: addi.d $a0, $sp, 0 +; CHECK-NEXT: bstrins.d $a0, $a3, 4, 0 +; CHECK-NEXT: st.b $a2, $a0, 0 +; CHECK-NEXT: xvld $xr0, $sp, 0 +; CHECK-NEXT: xvst $xr0, $a1, 0 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret + %v = load volatile <32 x i8>, ptr %src + %v_new = insertelement <32 x i8> %v, i8 %in, i32 %idx + store <32 x i8> %v_new, ptr %dst + ret void +} + +define void @insert_16xi16_idx(ptr %src, ptr %dst, i16 %in, i32 %idx) nounwind { +; CHECK-LABEL: insert_16xi16_idx: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: srli.d $a4, $sp, 5 +; CHECK-NEXT: slli.d $sp, $a4, 5 +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvst $xr0, $sp, 0 +; CHECK-NEXT: addi.d $a0, $sp, 0 +; CHECK-NEXT: bstrins.d $a0, $a3, 4, 1 +; CHECK-NEXT: st.h $a2, $a0, 0 +; CHECK-NEXT: xvld $xr0, $sp, 0 +; CHECK-NEXT: xvst $xr0, $a1, 0 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret + %v = load volatile <16 x i16>, ptr %src + %v_new = insertelement <16 x i16> %v, i16 %in, i32 %idx + store <16 x i16> %v_new, ptr %dst + ret void +} + +define void @insert_8xi32_idx(ptr %src, ptr %dst, i32 %in, i32 %idx) nounwind { +; CHECK-LABEL: insert_8xi32_idx: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: srli.d $a4, $sp, 5 +; CHECK-NEXT: slli.d $sp, $a4, 5 +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvst $xr0, $sp, 0 +; CHECK-NEXT: addi.d $a0, $sp, 0 +; CHECK-NEXT: bstrins.d $a0, $a3, 4, 2 +; CHECK-NEXT: st.w $a2, $a0, 0 +; CHECK-NEXT: xvld $xr0, $sp, 0 +; CHECK-NEXT: xvst $xr0, $a1, 0 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret + %v = load volatile <8 x i32>, ptr %src + %v_new = insertelement <8 x i32> %v, i32 %in, i32 %idx + store <8 x i32> %v_new, ptr %dst + ret void +} + +define void @insert_4xi64_idx(ptr %src, ptr %dst, i64 %in, i32 %idx) nounwind { +; CHECK-LABEL: insert_4xi64_idx: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: srli.d $a4, $sp, 5 +; CHECK-NEXT: slli.d $sp, $a4, 5 +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvst $xr0, $sp, 0 +; CHECK-NEXT: addi.d $a0, $sp, 0 +; CHECK-NEXT: bstrins.d $a0, $a3, 4, 3 +; CHECK-NEXT: st.d $a2, $a0, 0 +; CHECK-NEXT: xvld $xr0, $sp, 0 +; CHECK-NEXT: xvst $xr0, $a1, 0 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret + %v = load volatile <4 x i64>, ptr %src + %v_new = insertelement <4 x i64> %v, i64 %in, i32 %idx + store <4 x i64> %v_new, ptr %dst + ret void +} + +define void @insert_8xfloat_idx(ptr %src, ptr %dst, float %in, i32 %idx) nounwind { +; CHECK-LABEL: insert_8xfloat_idx: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: srli.d $a3, $sp, 5 +; CHECK-NEXT: slli.d $sp, $a3, 5 +; CHECK-NEXT: xvld $xr1, $a0, 0 +; CHECK-NEXT: xvst $xr1, $sp, 0 +; CHECK-NEXT: addi.d $a0, $sp, 0 +; CHECK-NEXT: bstrins.d $a0, $a2, 4, 2 +; CHECK-NEXT: fst.s $fa0, $a0, 0 +; CHECK-NEXT: xvld $xr0, $sp, 0 +; CHECK-NEXT: xvst $xr0, $a1, 0 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret + %v = load volatile <8 x float>, ptr %src + %v_new = insertelement <8 x float> %v, float %in, i32 %idx + store <8 x float> %v_new, ptr %dst + ret void +} + +define void @insert_4xdouble_idx(ptr %src, ptr %dst, double %in, i32 %idx) nounwind { +; CHECK-LABEL: insert_4xdouble_idx: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -64 +; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; CHECK-NEXT: addi.d $fp, $sp, 64 +; CHECK-NEXT: srli.d $a3, $sp, 5 +; CHECK-NEXT: slli.d $sp, $a3, 5 +; CHECK-NEXT: xvld $xr1, $a0, 0 +; CHECK-NEXT: xvst $xr1, $sp, 0 +; CHECK-NEXT: addi.d $a0, $sp, 0 +; CHECK-NEXT: bstrins.d $a0, $a2, 4, 3 +; CHECK-NEXT: fst.d $fa0, $a0, 0 +; CHECK-NEXT: xvld $xr0, $sp, 0 +; CHECK-NEXT: xvst $xr0, $a1, 0 +; CHECK-NEXT: addi.d $sp, $fp, -64 +; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 64 +; CHECK-NEXT: ret + %v = load volatile <4 x double>, ptr %src + %v_new = insertelement <4 x double> %v, double %in, i32 %idx + store <4 x double> %v_new, ptr %dst + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/lshr.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/lshr.ll new file mode 100644 index 0000000000000000000000000000000000000000..24be69d8032a82ad5667fb89c92ce2349168e105 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/lshr.ll @@ -0,0 +1,178 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +define void @lshr_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: lshr_v32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvsrl.b $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <32 x i8>, ptr %a0 + %v1 = load <32 x i8>, ptr %a1 + %v2 = lshr <32 x i8> %v0, %v1 + store <32 x i8> %v2, ptr %res + ret void +} + +define void @lshr_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: lshr_v16i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvsrl.h $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i16>, ptr %a0 + %v1 = load <16 x i16>, ptr %a1 + %v2 = lshr <16 x i16> %v0, %v1 + store <16 x i16> %v2, ptr %res + ret void +} + +define void @lshr_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: lshr_v8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvsrl.w $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i32>, ptr %a0 + %v1 = load <8 x i32>, ptr %a1 + %v2 = lshr <8 x i32> %v0, %v1 + store <8 x i32> %v2, ptr %res + ret void +} + +define void @lshr_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: lshr_v4i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvsrl.d $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i64>, ptr %a0 + %v1 = load <4 x i64>, ptr %a1 + %v2 = lshr <4 x i64> %v0, %v1 + store <4 x i64> %v2, ptr %res + ret void +} + +define void @lshr_v32i8_1(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: lshr_v32i8_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvsrli.b $xr0, $xr0, 1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <32 x i8>, ptr %a0 + %v1 = lshr <32 x i8> %v0, + store <32 x i8> %v1, ptr %res + ret void +} + +define void @lshr_v32i8_7(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: lshr_v32i8_7: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvsrli.b $xr0, $xr0, 7 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <32 x i8>, ptr %a0 + %v1 = lshr <32 x i8> %v0, + store <32 x i8> %v1, ptr %res + ret void +} + +define void @lshr_v16i16_1(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: lshr_v16i16_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvsrli.h $xr0, $xr0, 1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i16>, ptr %a0 + %v1 = lshr <16 x i16> %v0, + store <16 x i16> %v1, ptr %res + ret void +} + +define void @lshr_v16i16_15(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: lshr_v16i16_15: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvsrli.h $xr0, $xr0, 15 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i16>, ptr %a0 + %v1 = lshr <16 x i16> %v0, + store <16 x i16> %v1, ptr %res + ret void +} + +define void @lshr_v8i32_1(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: lshr_v8i32_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvsrli.w $xr0, $xr0, 1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i32>, ptr %a0 + %v1 = lshr <8 x i32> %v0, + store <8 x i32> %v1, ptr %res + ret void +} + +define void @lshr_v8i32_31(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: lshr_v8i32_31: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvsrli.w $xr0, $xr0, 31 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i32>, ptr %a0 + %v1 = lshr <8 x i32> %v0, + store <8 x i32> %v1, ptr %res + ret void +} + +define void @lshr_v4i64_1(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: lshr_v4i64_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvsrli.d $xr0, $xr0, 1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i64>, ptr %a0 + %v1 = lshr <4 x i64> %v0, + store <4 x i64> %v1, ptr %res + ret void +} + +define void @lshr_v4i64_63(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: lshr_v4i64_63: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvsrli.d $xr0, $xr0, 63 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i64>, ptr %a0 + %v1 = lshr <4 x i64> %v0, + store <4 x i64> %v1, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/mul.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/mul.ll new file mode 100644 index 0000000000000000000000000000000000000000..dcb893caa2555a27ee709c888b0724ba6c00fcdc --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/mul.ll @@ -0,0 +1,238 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +define void @mul_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: mul_v32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvmul.b $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <32 x i8>, ptr %a0 + %v1 = load <32 x i8>, ptr %a1 + %v2 = mul <32 x i8> %v0, %v1 + store <32 x i8> %v2, ptr %res + ret void +} + +define void @mul_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: mul_v16i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvmul.h $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i16>, ptr %a0 + %v1 = load <16 x i16>, ptr %a1 + %v2 = mul <16 x i16> %v0, %v1 + store <16 x i16> %v2, ptr %res + ret void +} + +define void @mul_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: mul_v8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvmul.w $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i32>, ptr %a0 + %v1 = load <8 x i32>, ptr %a1 + %v2 = mul <8 x i32> %v0, %v1 + store <8 x i32> %v2, ptr %res + ret void +} + +define void @mul_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: mul_v4i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvmul.d $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i64>, ptr %a0 + %v1 = load <4 x i64>, ptr %a1 + %v2 = mul <4 x i64> %v0, %v1 + store <4 x i64> %v2, ptr %res + ret void +} + +define void @mul_square_v32i8(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: mul_square_v32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvmul.b $xr0, $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <32 x i8>, ptr %a0 + %v1 = mul <32 x i8> %v0, %v0 + store <32 x i8> %v1, ptr %res + ret void +} + +define void @mul_square_v16i16(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: mul_square_v16i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvmul.h $xr0, $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i16>, ptr %a0 + %v1 = mul <16 x i16> %v0, %v0 + store <16 x i16> %v1, ptr %res + ret void +} + +define void @mul_square_v8i32(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: mul_square_v8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvmul.w $xr0, $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i32>, ptr %a0 + %v1 = mul <8 x i32> %v0, %v0 + store <8 x i32> %v1, ptr %res + ret void +} + +define void @mul_square_v4i64(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: mul_square_v4i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvmul.d $xr0, $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i64>, ptr %a0 + %v1 = mul <4 x i64> %v0, %v0 + store <4 x i64> %v1, ptr %res + ret void +} + +define void @mul_v32i8_8(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: mul_v32i8_8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvslli.b $xr0, $xr0, 3 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <32 x i8>, ptr %a0 + %v1 = mul <32 x i8> %v0, + store <32 x i8> %v1, ptr %res + ret void +} + +define void @mul_v16i16_8(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: mul_v16i16_8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvslli.h $xr0, $xr0, 3 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i16>, ptr %a0 + %v1 = mul <16 x i16> %v0, + store <16 x i16> %v1, ptr %res + ret void +} + +define void @mul_v8i32_8(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: mul_v8i32_8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvslli.w $xr0, $xr0, 3 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i32>, ptr %a0 + %v1 = mul <8 x i32> %v0, + store <8 x i32> %v1, ptr %res + ret void +} + +define void @mul_v4i64_8(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: mul_v4i64_8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvslli.d $xr0, $xr0, 3 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i64>, ptr %a0 + %v1 = mul <4 x i64> %v0, + store <4 x i64> %v1, ptr %res + ret void +} + +define void @mul_v32i8_17(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: mul_v32i8_17: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvrepli.b $xr1, 17 +; CHECK-NEXT: xvmul.b $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <32 x i8>, ptr %a0 + %v1 = mul <32 x i8> %v0, + store <32 x i8> %v1, ptr %res + ret void +} + +define void @mul_v16i16_17(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: mul_v16i16_17: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvrepli.h $xr1, 17 +; CHECK-NEXT: xvmul.h $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i16>, ptr %a0 + %v1 = mul <16 x i16> %v0, + store <16 x i16> %v1, ptr %res + ret void +} + +define void @mul_v8i32_17(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: mul_v8i32_17: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvrepli.w $xr1, 17 +; CHECK-NEXT: xvmul.w $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i32>, ptr %a0 + %v1 = mul <8 x i32> %v0, + store <8 x i32> %v1, ptr %res + ret void +} + +define void @mul_v4i64_17(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: mul_v4i64_17: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvrepli.d $xr1, 17 +; CHECK-NEXT: xvmul.d $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i64>, ptr %a0 + %v1 = mul <4 x i64> %v0, + store <4 x i64> %v1, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/or.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/or.ll new file mode 100644 index 0000000000000000000000000000000000000000..f37cbf1cefedc468595801f0bc0527ba859f5202 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/or.ll @@ -0,0 +1,125 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +define void @or_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: or_v32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <32 x i8>, ptr %a0 + %v1 = load <32 x i8>, ptr %a1 + %v2 = or <32 x i8> %v0, %v1 + store <32 x i8> %v2, ptr %res + ret void +} + +define void @or_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: or_v16i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i16>, ptr %a0 + %v1 = load <16 x i16>, ptr %a1 + %v2 = or <16 x i16> %v0, %v1 + store <16 x i16> %v2, ptr %res + ret void +} + +define void @or_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: or_v8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i32>, ptr %a0 + %v1 = load <8 x i32>, ptr %a1 + %v2 = or <8 x i32> %v0, %v1 + store <8 x i32> %v2, ptr %res + ret void +} + +define void @or_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: or_v4i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i64>, ptr %a0 + %v1 = load <4 x i64>, ptr %a1 + %v2 = or <4 x i64> %v0, %v1 + store <4 x i64> %v2, ptr %res + ret void +} + +define void @or_u_v32i8(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: or_u_v32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvori.b $xr0, $xr0, 31 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <32 x i8>, ptr %a0 + %v1 = or <32 x i8> %v0, + store <32 x i8> %v1, ptr %res + ret void +} + +define void @or_u_v16i16(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: or_u_v16i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvrepli.h $xr1, 31 +; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i16>, ptr %a0 + %v1 = or <16 x i16> %v0, + store <16 x i16> %v1, ptr %res + ret void +} + +define void @or_u_v8i32(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: or_u_v8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvrepli.w $xr1, 31 +; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i32>, ptr %a0 + %v1 = or <8 x i32> %v0, + store <8 x i32> %v1, ptr %res + ret void +} + +define void @or_u_v4i64(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: or_u_v4i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvrepli.d $xr1, 31 +; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i64>, ptr %a0 + %v1 = or <4 x i64> %v0, + store <4 x i64> %v1, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sdiv.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sdiv.ll new file mode 100644 index 0000000000000000000000000000000000000000..e3635a5f14a2bac49bb33ec7a9b7b0a04d7e9cca --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sdiv.ll @@ -0,0 +1,134 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +define void @sdiv_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: sdiv_v32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvdiv.b $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <32 x i8>, ptr %a0 + %v1 = load <32 x i8>, ptr %a1 + %v2 = sdiv <32 x i8> %v0, %v1 + store <32 x i8> %v2, ptr %res + ret void +} + +define void @sdiv_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: sdiv_v16i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvdiv.h $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i16>, ptr %a0 + %v1 = load <16 x i16>, ptr %a1 + %v2 = sdiv <16 x i16> %v0, %v1 + store <16 x i16> %v2, ptr %res + ret void +} + +define void @sdiv_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: sdiv_v8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvdiv.w $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i32>, ptr %a0 + %v1 = load <8 x i32>, ptr %a1 + %v2 = sdiv <8 x i32> %v0, %v1 + store <8 x i32> %v2, ptr %res + ret void +} + +define void @sdiv_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: sdiv_v4i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvdiv.d $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i64>, ptr %a0 + %v1 = load <4 x i64>, ptr %a1 + %v2 = sdiv <4 x i64> %v0, %v1 + store <4 x i64> %v2, ptr %res + ret void +} + +define void @sdiv_v32i8_8(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: sdiv_v32i8_8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvsrai.b $xr1, $xr0, 7 +; CHECK-NEXT: xvsrli.b $xr1, $xr1, 5 +; CHECK-NEXT: xvadd.b $xr0, $xr0, $xr1 +; CHECK-NEXT: xvsrai.b $xr0, $xr0, 3 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <32 x i8>, ptr %a0 + %v1 = sdiv <32 x i8> %v0, + store <32 x i8> %v1, ptr %res + ret void +} + +define void @sdiv_v16i16_8(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: sdiv_v16i16_8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvsrai.h $xr1, $xr0, 15 +; CHECK-NEXT: xvsrli.h $xr1, $xr1, 13 +; CHECK-NEXT: xvadd.h $xr0, $xr0, $xr1 +; CHECK-NEXT: xvsrai.h $xr0, $xr0, 3 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i16>, ptr %a0 + %v1 = sdiv <16 x i16> %v0, + store <16 x i16> %v1, ptr %res + ret void +} + +define void @sdiv_v8i32_8(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: sdiv_v8i32_8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvsrai.w $xr1, $xr0, 31 +; CHECK-NEXT: xvsrli.w $xr1, $xr1, 29 +; CHECK-NEXT: xvadd.w $xr0, $xr0, $xr1 +; CHECK-NEXT: xvsrai.w $xr0, $xr0, 3 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i32>, ptr %a0 + %v1 = sdiv <8 x i32> %v0, + store <8 x i32> %v1, ptr %res + ret void +} + +define void @sdiv_v4i64_8(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: sdiv_v4i64_8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvsrai.d $xr1, $xr0, 63 +; CHECK-NEXT: xvsrli.d $xr1, $xr1, 61 +; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1 +; CHECK-NEXT: xvsrai.d $xr0, $xr0, 3 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i64>, ptr %a0 + %v1 = sdiv <4 x i64> %v0, + store <4 x i64> %v1, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shl.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shl.ll new file mode 100644 index 0000000000000000000000000000000000000000..8a02c7e3ac975a9a1cddba166a2b6cbb79d9ddd5 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shl.ll @@ -0,0 +1,178 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +define void @shl_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: shl_v32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvsll.b $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <32 x i8>, ptr %a0 + %v1 = load <32 x i8>, ptr %a1 + %v2 = shl <32 x i8> %v0, %v1 + store <32 x i8> %v2, ptr %res + ret void +} + +define void @shl_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: shl_v16i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvsll.h $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i16>, ptr %a0 + %v1 = load <16 x i16>, ptr %a1 + %v2 = shl <16 x i16> %v0, %v1 + store <16 x i16> %v2, ptr %res + ret void +} + +define void @shl_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: shl_v8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvsll.w $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i32>, ptr %a0 + %v1 = load <8 x i32>, ptr %a1 + %v2 = shl <8 x i32> %v0, %v1 + store <8 x i32> %v2, ptr %res + ret void +} + +define void @shl_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: shl_v4i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvsll.d $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i64>, ptr %a0 + %v1 = load <4 x i64>, ptr %a1 + %v2 = shl <4 x i64> %v0, %v1 + store <4 x i64> %v2, ptr %res + ret void +} + +define void @shl_v32i8_1(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: shl_v32i8_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvslli.b $xr0, $xr0, 1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <32 x i8>, ptr %a0 + %v1 = shl <32 x i8> %v0, + store <32 x i8> %v1, ptr %res + ret void +} + +define void @shl_v32i8_7(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: shl_v32i8_7: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvslli.b $xr0, $xr0, 7 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <32 x i8>, ptr %a0 + %v1 = shl <32 x i8> %v0, + store <32 x i8> %v1, ptr %res + ret void +} + +define void @shl_v16i16_1(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: shl_v16i16_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvslli.h $xr0, $xr0, 1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i16>, ptr %a0 + %v1 = shl <16 x i16> %v0, + store <16 x i16> %v1, ptr %res + ret void +} + +define void @shl_v16i16_15(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: shl_v16i16_15: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvslli.h $xr0, $xr0, 15 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i16>, ptr %a0 + %v1 = shl <16 x i16> %v0, + store <16 x i16> %v1, ptr %res + ret void +} + +define void @shl_v8i32_1(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: shl_v8i32_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvslli.w $xr0, $xr0, 1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i32>, ptr %a0 + %v1 = shl <8 x i32> %v0, + store <8 x i32> %v1, ptr %res + ret void +} + +define void @shl_v8i32_31(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: shl_v8i32_31: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvslli.w $xr0, $xr0, 31 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i32>, ptr %a0 + %v1 = shl <8 x i32> %v0, + store <8 x i32> %v1, ptr %res + ret void +} + +define void @shl_v4i64_1(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: shl_v4i64_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvslli.d $xr0, $xr0, 1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i64>, ptr %a0 + %v1 = shl <4 x i64> %v0, + store <4 x i64> %v1, ptr %res + ret void +} + +define void @shl_v4i64_63(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: shl_v4i64_63: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvslli.d $xr0, $xr0, 63 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i64>, ptr %a0 + %v1 = shl <4 x i64> %v0, + store <4 x i64> %v1, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvilv.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvilv.ll new file mode 100644 index 0000000000000000000000000000000000000000..22ab19b9fa44674d836c4e162f9e7f508985e885 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvilv.ll @@ -0,0 +1,74 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx %s -o - | FileCheck %s + +;; xvilvl.b +define <32 x i8> @shufflevector_xvilvl_v32i8(<32 x i8> %a, <32 x i8> %b) { +; CHECK-LABEL: shufflevector_xvilvl_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: xvilvl.b $xr0, $xr1, $xr0 +; CHECK-NEXT: ret + %c = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> + ret <32 x i8> %c +} + +;; xvilvl.h +define <16 x i16> @shufflevector_xvilvl_v16i16(<16 x i16> %a, <16 x i16> %b) { +; CHECK-LABEL: shufflevector_xvilvl_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: xvilvl.h $xr0, $xr1, $xr0 +; CHECK-NEXT: ret + %c = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> + ret <16 x i16> %c +} + +;; xvilvl.w +define <8 x i32> @shufflevector_xvilvl_v8i32(<8 x i32> %a, <8 x i32> %b) { +; CHECK-LABEL: shufflevector_xvilvl_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvilvl.w $xr0, $xr1, $xr0 +; CHECK-NEXT: ret + %c = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> + ret <8 x i32> %c +} + +;; xvilvh.b +define <32 x i8> @shufflevector_xvilvh_v32i8(<32 x i8> %a, <32 x i8> %b) { +; CHECK-LABEL: shufflevector_xvilvh_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: xvilvh.b $xr0, $xr1, $xr0 +; CHECK-NEXT: ret + %c = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> + ret <32 x i8> %c +} + +;; xvilvh.h +define <16 x i16> @shufflevector_xvilvh_v16i16(<16 x i16> %a, <16 x i16> %b) { +; CHECK-LABEL: shufflevector_xvilvh_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: xvilvh.h $xr0, $xr1, $xr0 +; CHECK-NEXT: ret + %c = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> + ret <16 x i16> %c +} + +;; xvilvh.w +define <8 x i32> @shufflevector_xvilvh_v8i32(<8 x i32> %a, <8 x i32> %b) { +; CHECK-LABEL: shufflevector_xvilvh_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvilvh.w $xr0, $xr1, $xr0 +; CHECK-NEXT: ret + %c = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> + ret <8 x i32> %c +} + +;; xvilvh.w +define <8 x float> @shufflevector_xvilvh_v8f32(<8 x float> %a, <8 x float> %b) { +; CHECK-LABEL: shufflevector_xvilvh_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvilvh.w $xr0, $xr1, $xr0 +; CHECK-NEXT: ret + %c = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> + ret <8 x float> %c +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvpack.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvpack.ll new file mode 100644 index 0000000000000000000000000000000000000000..2ff9af4069b9bd873fe72248fdfe5a4a5a1b80da --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvpack.ll @@ -0,0 +1,124 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx %s -o - | FileCheck %s + +;; xvpackev.b +define <32 x i8> @shufflevector_pack_ev_v32i8(<32 x i8> %a, <32 x i8> %b) { +; CHECK-LABEL: shufflevector_pack_ev_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: xvpackev.b $xr0, $xr1, $xr0 +; CHECK-NEXT: ret + %c = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> + ret <32 x i8> %c +} + +;; xvpackev.h +define <16 x i16> @shufflevector_pack_ev_v16i16(<16 x i16> %a, <16 x i16> %b) { +; CHECK-LABEL: shufflevector_pack_ev_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: xvpackev.h $xr0, $xr1, $xr0 +; CHECK-NEXT: ret + %c = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> + ret <16 x i16> %c +} + +;; xvpackev.w +define <8 x i32> @shufflevector_pack_ev_v8i32(<8 x i32> %a, <8 x i32> %b) { +; CHECK-LABEL: shufflevector_pack_ev_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvpackev.w $xr0, $xr1, $xr0 +; CHECK-NEXT: ret + %c = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> + ret <8 x i32> %c +} + +;; xvpickev.d/xvpackev.d/xvilvl.d +define <4 x i64> @shufflevector_pack_ev_v4i64(<4 x i64> %a, <4 x i64> %b) { +; CHECK-LABEL: shufflevector_pack_ev_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: xvpackev.d $xr0, $xr1, $xr0 +; CHECK-NEXT: ret + %c = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> + ret <4 x i64> %c +} + +;; xvpackev.w +define <8 x float> @shufflevector_pack_ev_v8f32(<8 x float> %a, <8 x float> %b) { +; CHECK-LABEL: shufflevector_pack_ev_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvpackev.w $xr0, $xr1, $xr0 +; CHECK-NEXT: ret + %c = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> + ret <8 x float> %c +} + +;; xvpickev.d/xvpackev.d/xvilvl.d +define <4 x double> @shufflevector_pack_ev_v4f64(<4 x double> %a, <4 x double> %b) { +; CHECK-LABEL: shufflevector_pack_ev_v4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: xvpackev.d $xr0, $xr1, $xr0 +; CHECK-NEXT: ret + %c = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> + ret <4 x double> %c +} + +;; xvpackod.b +define <32 x i8> @shufflevector_pack_od_v32i8(<32 x i8> %a, <32 x i8> %b) { +; CHECK-LABEL: shufflevector_pack_od_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: xvpackod.b $xr0, $xr1, $xr0 +; CHECK-NEXT: ret + %c = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> + ret <32 x i8> %c +} + +;; xvpackod.h +define <16 x i16> @shufflevector_pack_od_v16i16(<16 x i16> %a, <16 x i16> %b) { +; CHECK-LABEL: shufflevector_pack_od_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: xvpackod.h $xr0, $xr1, $xr0 +; CHECK-NEXT: ret + %c = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> + ret <16 x i16> %c +} + +;; xvpackod.w +define <8 x i32> @shufflevector_pack_od_v8i32(<8 x i32> %a, <8 x i32> %b) { +; CHECK-LABEL: shufflevector_pack_od_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvpackod.w $xr0, $xr1, $xr0 +; CHECK-NEXT: ret + %c = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> + ret <8 x i32> %c +} + +;; xvpickod.d/xvpackod.d/xvilvh.d +define <4 x i64> @shufflodector_pack_od_v4i64(<4 x i64> %a, <4 x i64> %b) { +; CHECK-LABEL: shufflodector_pack_od_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: xvpackod.d $xr0, $xr1, $xr0 +; CHECK-NEXT: ret + %c = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> + ret <4 x i64> %c +} + +;; xvpackod.w +define <8 x float> @shufflodector_pack_od_v8f32(<8 x float> %a, <8 x float> %b) { +; CHECK-LABEL: shufflodector_pack_od_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvpackod.w $xr0, $xr1, $xr0 +; CHECK-NEXT: ret + %c = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> + ret <8 x float> %c +} + +;; xvpickod.d/xvpackod.d/xvilvh.d +define <4 x double> @shufflodector_pack_od_v4f64(<4 x double> %a, <4 x double> %b) { +; CHECK-LABEL: shufflodector_pack_od_v4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: xvpackod.d $xr0, $xr1, $xr0 +; CHECK-NEXT: ret + %c = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> + ret <4 x double> %c +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvpick.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvpick.ll new file mode 100644 index 0000000000000000000000000000000000000000..294d292d1764067c80a79af83c6aeb87f6982d53 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvpick.ll @@ -0,0 +1,84 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx %s -o - | FileCheck %s + +;; xvpickev.b +define <32 x i8> @shufflevector_pick_ev_v32i8(<32 x i8> %a, <32 x i8> %b) { +; CHECK-LABEL: shufflevector_pick_ev_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: xvpickev.b $xr0, $xr1, $xr0 +; CHECK-NEXT: ret + %c = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> + ret <32 x i8> %c +} + +;; xvpickev.h +define <16 x i16> @shufflevector_pick_ev_v16i16(<16 x i16> %a, <16 x i16> %b) { +; CHECK-LABEL: shufflevector_pick_ev_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: xvpickev.h $xr0, $xr1, $xr0 +; CHECK-NEXT: ret + %c = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> + ret <16 x i16> %c +} + +;; xvpickev.w +define <8 x i32> @shufflevector_pick_ev_v8i32(<8 x i32> %a, <8 x i32> %b) { +; CHECK-LABEL: shufflevector_pick_ev_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvpickev.w $xr0, $xr1, $xr0 +; CHECK-NEXT: ret + %c = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> + ret <8 x i32> %c +} + +;; xvpickev.w +define <8 x float> @shufflevector_pick_ev_v8f32(<8 x float> %a, <8 x float> %b) { +; CHECK-LABEL: shufflevector_pick_ev_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvpickev.w $xr0, $xr1, $xr0 +; CHECK-NEXT: ret + %c = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> + ret <8 x float> %c +} + +;; xvpickod.b +define <32 x i8> @shufflevector_pick_od_v32i8(<32 x i8> %a, <32 x i8> %b) { +; CHECK-LABEL: shufflevector_pick_od_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: xvpickod.b $xr0, $xr1, $xr0 +; CHECK-NEXT: ret + %c = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> + ret <32 x i8> %c +} + +;; xvpickod.h +define <16 x i16> @shufflevector_pick_od_v16i16(<16 x i16> %a, <16 x i16> %b) { +; CHECK-LABEL: shufflevector_pick_od_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: xvpickod.h $xr0, $xr1, $xr0 +; CHECK-NEXT: ret + %c = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> + ret <16 x i16> %c +} + +;; xvpickod.w +define <8 x i32> @shufflevector_pick_od_v8i32(<8 x i32> %a, <8 x i32> %b) { +; CHECK-LABEL: shufflevector_pick_od_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvpickod.w $xr0, $xr1, $xr0 +; CHECK-NEXT: ret + %c = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> + ret <8 x i32> %c +} + +;; xvpickod.w +define <8 x float> @shufflodector_pick_od_v8f32(<8 x float> %a, <8 x float> %b) { +; CHECK-LABEL: shufflodector_pick_od_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvpickod.w $xr0, $xr1, $xr0 +; CHECK-NEXT: ret + %c = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> + ret <8 x float> %c +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvrepl128vei.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvrepl128vei.ll new file mode 100644 index 0000000000000000000000000000000000000000..dce1e4b777e291c95482d441dcc14900f18e86bf --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvrepl128vei.ll @@ -0,0 +1,65 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx %s -o - | FileCheck %s + +;; xvrepl128vei.b +define <32 x i8> @shufflevector_v32i8(<32 x i8> %a, <32 x i8> %b) { +; CHECK-LABEL: shufflevector_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: xvrepl128vei.b $xr0, $xr0, 1 +; CHECK-NEXT: ret + %c = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> + ret <32 x i8> %c +} + +;; xvrepl128vei.h +define <16 x i16> @shufflevector_v16i16(<16 x i16> %a, <16 x i16> %b) { +; CHECK-LABEL: shufflevector_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: xvrepl128vei.h $xr0, $xr0, 3 +; CHECK-NEXT: ret + %c = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> + ret <16 x i16> %c +} + +;; xvrepl128vei.w +define <8 x i32> @shufflevector_v8i32(<8 x i32> %a, <8 x i32> %b) { +; CHECK-LABEL: shufflevector_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvpermi.d $xr0, $xr0, 78 +; CHECK-NEXT: xvrepl128vei.w $xr0, $xr0, 3 +; CHECK-NEXT: ret + %c = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> + ret <8 x i32> %c +} + +;; xvrepl128vei.d +define <4 x i64> @shufflevector_v4i64(<4 x i64> %a, <4 x i64> %b) { +; CHECK-LABEL: shufflevector_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: xvrepl128vei.d $xr0, $xr0, 1 +; CHECK-NEXT: ret + %c = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> + ret <4 x i64> %c +} + +;; xvrepl128vei.w +define <8 x float> @shufflevector_v8f32(<8 x float> %a, <8 x float> %b) { +; CHECK-LABEL: shufflevector_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvrepl128vei.w $xr0, $xr0, 3 +; CHECK-NEXT: ret + %c = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> + ret <8 x float> %c +} + +;; xvrepl128vei.d +define <4 x double> @shufflevector_v4f64(<4 x double> %a, <4 x double> %b) { +; CHECK-LABEL: shufflevector_v4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: xvrepl128vei.d $xr0, $xr1, 1 +; CHECK-NEXT: ret + %c = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> + ret <4 x double> %c +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf.ll new file mode 100644 index 0000000000000000000000000000000000000000..9c8afd1e626af2fdabaee88a7061d2b867f9dfb0 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf.ll @@ -0,0 +1,76 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx %s -o - | FileCheck %s + +;; xvshuf.b +define <32 x i8> @shufflevector_v32i8(<32 x i8> %a, <32 x i8> %b) { +; CHECK-LABEL: shufflevector_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI0_0) +; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI0_0) +; CHECK-NEXT: xvld $xr2, $a0, 0 +; CHECK-NEXT: xvshuf.b $xr0, $xr1, $xr0, $xr2 +; CHECK-NEXT: ret + %c = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> + ret <32 x i8> %c +} + +;; xvshuf.h +define <16 x i16> @shufflevector_v16i16(<16 x i16> %a, <16 x i16> %b) { +; CHECK-LABEL: shufflevector_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: xvpermi.d $xr2, $xr0, 78 +; CHECK-NEXT: xvpermi.d $xr1, $xr1, 78 +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI1_0) +; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI1_0) +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvshuf.h $xr0, $xr1, $xr2 +; CHECK-NEXT: ret + %c = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> + ret <16 x i16> %c +} + +;; xvshuf.w +define <8 x i32> @shufflevector_v8i32(<8 x i32> %a, <8 x i32> %b) { +; CHECK-LABEL: shufflevector_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvpermi.d $xr2, $xr0, 68 +; CHECK-NEXT: xvpermi.d $xr1, $xr1, 68 +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_0) +; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI2_0) +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvshuf.w $xr0, $xr1, $xr2 +; CHECK-NEXT: ret + %c = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> + ret <8 x i32> %c +} + +;; xvshuf.d +define <4 x i64> @shufflevector_v4i64(<4 x i64> %a, <4 x i64> %b) { +; CHECK-LABEL: shufflevector_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: xvpermi.d $xr2, $xr0, 238 +; CHECK-NEXT: xvpermi.d $xr1, $xr1, 238 +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0) +; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI3_0) +; CHECK-NEXT: xvld $xr0, $a0, 0 +; CHECK-NEXT: xvshuf.d $xr0, $xr1, $xr2 +; CHECK-NEXT: ret + %c = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> + ret <4 x i64> %c +} + +;; xvshuf.w +define <8 x float> @shufflevector_v8f32(<8 x float> %a, <8 x float> %b) { +; CHECK-LABEL: shufflevector_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI4_0) +; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI4_0) +; CHECK-NEXT: xvld $xr2, $a0, 0 +; CHECK-NEXT: xvshuf.w $xr2, $xr1, $xr0 +; CHECK-NEXT: xvori.b $xr0, $xr2, 0 +; CHECK-NEXT: ret + %c = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> + ret <8 x float> %c +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf4i.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf4i.ll new file mode 100644 index 0000000000000000000000000000000000000000..dc4532a7292abb660a17bc031afa9372c066281a --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/shuffle-as-xvshuf4i.ll @@ -0,0 +1,43 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx %s -o - | FileCheck %s + +;; xxvshuf4i.b +define <32 x i8> @shufflevector_xvshuf4i_v32i8(<32 x i8> %a, <32 x i8> %b) { +; CHECK-LABEL: shufflevector_xvshuf4i_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: xvshuf4i.b $xr0, $xr0, 27 +; CHECK-NEXT: ret + %c = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> + ret <32 x i8> %c +} + +;; xvshuf4i.h +define <16 x i16> @shufflevector_xvshuf4i_v16i16(<16 x i16> %a, <16 x i16> %b) { +; CHECK-LABEL: shufflevector_xvshuf4i_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: xvshuf4i.h $xr0, $xr0, 27 +; CHECK-NEXT: ret + %c = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> + ret <16 x i16> %c +} + +;; xvshuf4i.w +define <8 x i32> @shufflevector_xvshuf4i_v8i32(<8 x i32> %a, <8 x i32> %b) { +; CHECK-LABEL: shufflevector_xvshuf4i_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvshuf4i.w $xr0, $xr0, 27 +; CHECK-NEXT: ret + %c = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> + ret <8 x i32> %c +} + +;; xvshuf4i.w +define <8 x float> @shufflevector_xvshuf4i_v8f32(<8 x float> %a, <8 x float> %b) { +; CHECK-LABEL: shufflevector_xvshuf4i_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvshuf4i.w $xr0, $xr0, 27 +; CHECK-NEXT: ret + %c = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> + ret <8 x float> %c +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sitofp.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sitofp.ll new file mode 100644 index 0000000000000000000000000000000000000000..208a758ea4e9a2212c8b44531a9070370d58c2a1 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sitofp.ll @@ -0,0 +1,57 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +define void @sitofp_v8i32_v8f32(ptr %res, ptr %in){ +; CHECK-LABEL: sitofp_v8i32_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvffint.s.w $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i32>, ptr %in + %v1 = sitofp <8 x i32> %v0 to <8 x float> + store <8 x float> %v1, ptr %res + ret void +} + +define void @sitofp_v4f64_v4f64(ptr %res, ptr %in){ +; CHECK-LABEL: sitofp_v4f64_v4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvffint.d.l $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i64>, ptr %in + %v1 = sitofp <4 x i64> %v0 to <4 x double> + store <4 x double> %v1, ptr %res + ret void +} + +define void @sitofp_v4i64_v4f32(ptr %res, ptr %in){ +; CHECK-LABEL: sitofp_v4i64_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvffint.d.l $xr0, $xr0 +; CHECK-NEXT: xvpermi.d $xr1, $xr0, 238 +; CHECK-NEXT: xvfcvt.s.d $xr0, $xr1, $xr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i64>, ptr %in + %v1 = sitofp <4 x i64> %v0 to <4 x float> + store <4 x float> %v1, ptr %res + ret void +} + +define void @sitofp_v4i32_v4f64(ptr %res, ptr %in){ +; CHECK-LABEL: sitofp_v4i32_v4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vext2xv.d.w $xr0, $xr0 +; CHECK-NEXT: xvffint.d.l $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i32>, ptr %in + %v1 = sitofp <4 x i32> %v0 to <4 x double> + store <4 x double> %v1, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sub.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sub.ll new file mode 100644 index 0000000000000000000000000000000000000000..bcfff16514770f357a7d2502ffb406af8fe371df --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/sub.ll @@ -0,0 +1,122 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +define void @sub_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: sub_v32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvsub.b $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <32 x i8>, ptr %a0 + %v1 = load <32 x i8>, ptr %a1 + %v2 = sub <32 x i8> %v0, %v1 + store <32 x i8> %v2, ptr %res + ret void +} + +define void @sub_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: sub_v16i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvsub.h $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i16>, ptr %a0 + %v1 = load <16 x i16>, ptr %a1 + %v2 = sub <16 x i16> %v0, %v1 + store <16 x i16> %v2, ptr %res + ret void +} + +define void @sub_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: sub_v8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvsub.w $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i32>, ptr %a0 + %v1 = load <8 x i32>, ptr %a1 + %v2 = sub <8 x i32> %v0, %v1 + store <8 x i32> %v2, ptr %res + ret void +} + +define void @sub_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: sub_v4i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvsub.d $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i64>, ptr %a0 + %v1 = load <4 x i64>, ptr %a1 + %v2 = sub <4 x i64> %v0, %v1 + store <4 x i64> %v2, ptr %res + ret void +} + +define void @sub_v32i8_31(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: sub_v32i8_31: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvsubi.bu $xr0, $xr0, 31 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <32 x i8>, ptr %a0 + %v1 = sub <32 x i8> %v0, + store <32 x i8> %v1, ptr %res + ret void +} + +define void @sub_v16i16_31(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: sub_v16i16_31: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvsubi.hu $xr0, $xr0, 31 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i16>, ptr %a0 + %v1 = sub <16 x i16> %v0, + store <16 x i16> %v1, ptr %res + ret void +} + +define void @sub_v8i32_31(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: sub_v8i32_31: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvsubi.wu $xr0, $xr0, 31 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i32>, ptr %a0 + %v1 = sub <8 x i32> %v0, + store <8 x i32> %v1, ptr %res + ret void +} + +define void @sub_v4i64_31(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: sub_v4i64_31: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvsubi.du $xr0, $xr0, 31 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i64>, ptr %a0 + %v1 = sub <4 x i64> %v0, + store <4 x i64> %v1, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/udiv.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/udiv.ll new file mode 100644 index 0000000000000000000000000000000000000000..e78084c7186d338f4c8108bc482f974bd97345e4 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/udiv.ll @@ -0,0 +1,122 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +define void @udiv_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: udiv_v32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvdiv.bu $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <32 x i8>, ptr %a0 + %v1 = load <32 x i8>, ptr %a1 + %v2 = udiv <32 x i8> %v0, %v1 + store <32 x i8> %v2, ptr %res + ret void +} + +define void @udiv_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: udiv_v16i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvdiv.hu $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i16>, ptr %a0 + %v1 = load <16 x i16>, ptr %a1 + %v2 = udiv <16 x i16> %v0, %v1 + store <16 x i16> %v2, ptr %res + ret void +} + +define void @udiv_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: udiv_v8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvdiv.wu $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i32>, ptr %a0 + %v1 = load <8 x i32>, ptr %a1 + %v2 = udiv <8 x i32> %v0, %v1 + store <8 x i32> %v2, ptr %res + ret void +} + +define void @udiv_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: udiv_v4i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvdiv.du $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i64>, ptr %a0 + %v1 = load <4 x i64>, ptr %a1 + %v2 = udiv <4 x i64> %v0, %v1 + store <4 x i64> %v2, ptr %res + ret void +} + +define void @udiv_v32i8_8(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: udiv_v32i8_8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvsrli.b $xr0, $xr0, 3 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <32 x i8>, ptr %a0 + %v1 = udiv <32 x i8> %v0, + store <32 x i8> %v1, ptr %res + ret void +} + +define void @udiv_v16i16_8(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: udiv_v16i16_8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvsrli.h $xr0, $xr0, 3 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i16>, ptr %a0 + %v1 = udiv <16 x i16> %v0, + store <16 x i16> %v1, ptr %res + ret void +} + +define void @udiv_v8i32_8(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: udiv_v8i32_8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvsrli.w $xr0, $xr0, 3 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i32>, ptr %a0 + %v1 = udiv <8 x i32> %v0, + store <8 x i32> %v1, ptr %res + ret void +} + +define void @udiv_v4i64_8(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: udiv_v4i64_8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvsrli.d $xr0, $xr0, 3 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i64>, ptr %a0 + %v1 = udiv <4 x i64> %v0, + store <4 x i64> %v1, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/uitofp.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/uitofp.ll new file mode 100644 index 0000000000000000000000000000000000000000..70cf71c4cec218c2b8f892573431ac6add4ddf59 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/uitofp.ll @@ -0,0 +1,57 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +define void @uitofp_v8i32_v8f32(ptr %res, ptr %in){ +; CHECK-LABEL: uitofp_v8i32_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvffint.s.wu $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i32>, ptr %in + %v1 = uitofp <8 x i32> %v0 to <8 x float> + store <8 x float> %v1, ptr %res + ret void +} + +define void @uitofp_v4f64_v4f64(ptr %res, ptr %in){ +; CHECK-LABEL: uitofp_v4f64_v4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvffint.d.lu $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i64>, ptr %in + %v1 = uitofp <4 x i64> %v0 to <4 x double> + store <4 x double> %v1, ptr %res + ret void +} + +define void @uitofp_v4i64_v4f32(ptr %res, ptr %in){ +; CHECK-LABEL: uitofp_v4i64_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvffint.d.lu $xr0, $xr0 +; CHECK-NEXT: xvpermi.d $xr1, $xr0, 238 +; CHECK-NEXT: xvfcvt.s.d $xr0, $xr1, $xr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i64>, ptr %in + %v1 = uitofp <4 x i64> %v0 to <4 x float> + store <4 x float> %v1, ptr %res + ret void +} + +define void @uitofp_v4i32_v4f64(ptr %res, ptr %in){ +; CHECK-LABEL: uitofp_v4i32_v4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vext2xv.du.wu $xr0, $xr0 +; CHECK-NEXT: xvffint.d.lu $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i32>, ptr %in + %v1 = uitofp <4 x i32> %v0 to <4 x double> + store <4 x double> %v1, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/xor.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/xor.ll new file mode 100644 index 0000000000000000000000000000000000000000..c2fb1462b7a2501a4cf632bdf352cdd76a81d950 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/xor.ll @@ -0,0 +1,125 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +define void @xor_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: xor_v32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvxor.v $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <32 x i8>, ptr %a0 + %v1 = load <32 x i8>, ptr %a1 + %v2 = xor <32 x i8> %v0, %v1 + store <32 x i8> %v2, ptr %res + ret void +} + +define void @xor_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: xor_v16i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvxor.v $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i16>, ptr %a0 + %v1 = load <16 x i16>, ptr %a1 + %v2 = xor <16 x i16> %v0, %v1 + store <16 x i16> %v2, ptr %res + ret void +} + +define void @xor_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: xor_v8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvxor.v $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i32>, ptr %a0 + %v1 = load <8 x i32>, ptr %a1 + %v2 = xor <8 x i32> %v0, %v1 + store <8 x i32> %v2, ptr %res + ret void +} + +define void @xor_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: xor_v4i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvxor.v $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i64>, ptr %a0 + %v1 = load <4 x i64>, ptr %a1 + %v2 = xor <4 x i64> %v0, %v1 + store <4 x i64> %v2, ptr %res + ret void +} + +define void @xor_u_v32i8(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: xor_u_v32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvxori.b $xr0, $xr0, 31 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <32 x i8>, ptr %a0 + %v1 = xor <32 x i8> %v0, + store <32 x i8> %v1, ptr %res + ret void +} + +define void @xor_u_v16i16(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: xor_u_v16i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvrepli.h $xr1, 31 +; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i16>, ptr %a0 + %v1 = xor <16 x i16> %v0, + store <16 x i16> %v1, ptr %res + ret void +} + +define void @xor_u_v8i32(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: xor_u_v8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvrepli.w $xr1, 31 +; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i32>, ptr %a0 + %v1 = xor <8 x i32> %v0, + store <8 x i32> %v1, ptr %res + ret void +} + +define void @xor_u_v4i64(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: xor_u_v4i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvrepli.d $xr1, 31 +; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i64>, ptr %a0 + %v1 = xor <4 x i64> %v0, + store <4 x i64> %v1, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/mulh.ll b/llvm/test/CodeGen/LoongArch/lasx/mulh.ll new file mode 100644 index 0000000000000000000000000000000000000000..aac711a4a371ca9cc50c4ffc5be43ac860df0e51 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/mulh.ll @@ -0,0 +1,162 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +define void @mulhs_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: mulhs_v32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvmuh.b $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <32 x i8>, ptr %a0 + %v1 = load <32 x i8>, ptr %a1 + %v0s = sext <32 x i8> %v0 to <32 x i16> + %v1s = sext <32 x i8> %v1 to <32 x i16> + %m = mul <32 x i16> %v0s, %v1s + %s = ashr <32 x i16> %m, + %v2 = trunc <32 x i16> %s to <32 x i8> + store <32 x i8> %v2, ptr %res + ret void +} + +define void @mulhu_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: mulhu_v32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvmuh.bu $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <32 x i8>, ptr %a0 + %v1 = load <32 x i8>, ptr %a1 + %v0z = zext <32 x i8> %v0 to <32 x i16> + %v1z = zext <32 x i8> %v1 to <32 x i16> + %m = mul <32 x i16> %v0z, %v1z + %s = lshr <32 x i16> %m, + %v2 = trunc <32 x i16> %s to <32 x i8> + store <32 x i8> %v2, ptr %res + ret void +} + +define void @mulhs_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: mulhs_v16i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvmuh.h $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i16>, ptr %a0 + %v1 = load <16 x i16>, ptr %a1 + %v0s = sext <16 x i16> %v0 to <16 x i32> + %v1s = sext <16 x i16> %v1 to <16 x i32> + %m = mul <16 x i32> %v0s, %v1s + %s = ashr <16 x i32> %m, + %v2 = trunc <16 x i32> %s to <16 x i16> + store <16 x i16> %v2, ptr %res + ret void +} + +define void @mulhu_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: mulhu_v16i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvmuh.hu $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i16>, ptr %a0 + %v1 = load <16 x i16>, ptr %a1 + %v0z = zext <16 x i16> %v0 to <16 x i32> + %v1z = zext <16 x i16> %v1 to <16 x i32> + %m = mul <16 x i32> %v0z, %v1z + %s = lshr <16 x i32> %m, + %v2 = trunc <16 x i32> %s to <16 x i16> + store <16 x i16> %v2, ptr %res + ret void +} + +define void @mulhs_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: mulhs_v8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvmuh.w $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i32>, ptr %a0 + %v1 = load <8 x i32>, ptr %a1 + %v0s = sext <8 x i32> %v0 to <8 x i64> + %v1s = sext <8 x i32> %v1 to <8 x i64> + %m = mul <8 x i64> %v0s, %v1s + %s = ashr <8 x i64> %m, + %v2 = trunc <8 x i64> %s to <8 x i32> + store <8 x i32> %v2, ptr %res + ret void +} + +define void @mulhu_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: mulhu_v8i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvmuh.wu $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i32>, ptr %a0 + %v1 = load <8 x i32>, ptr %a1 + %v0z = zext <8 x i32> %v0 to <8 x i64> + %v1z = zext <8 x i32> %v1 to <8 x i64> + %m = mul <8 x i64> %v0z, %v1z + %s = lshr <8 x i64> %m, + %v2 = trunc <8 x i64> %s to <8 x i32> + store <8 x i32> %v2, ptr %res + ret void +} + +define void @mulhs_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: mulhs_v4i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvmuh.d $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i64>, ptr %a0 + %v1 = load <4 x i64>, ptr %a1 + %v0s = sext <4 x i64> %v0 to <4 x i128> + %v1s = sext <4 x i64> %v1 to <4 x i128> + %m = mul <4 x i128> %v0s, %v1s + %s = ashr <4 x i128> %m, + %v2 = trunc <4 x i128> %s to <4 x i64> + store <4 x i64> %v2, ptr %res + ret void +} + +define void @mulhu_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: mulhu_v4i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a2, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvmuh.du $xr0, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i64>, ptr %a0 + %v1 = load <4 x i64>, ptr %a1 + %v0z = zext <4 x i64> %v0 to <4 x i128> + %v1z = zext <4 x i64> %v1 to <4 x i128> + %m = mul <4 x i128> %v0z, %v1z + %s = lshr <4 x i128> %m, + %v2 = trunc <4 x i128> %s to <4 x i64> + store <4 x i64> %v2, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/vselect.ll b/llvm/test/CodeGen/LoongArch/lasx/vselect.ll new file mode 100644 index 0000000000000000000000000000000000000000..ec2fc28db33cc7ae13ab565fdd060a1f14ba7d58 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/vselect.ll @@ -0,0 +1,86 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +define void @select_v32i8_imm(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: select_v32i8_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvrepli.h $xr1, -256 +; CHECK-NEXT: xvbitseli.b $xr1, $xr0, 1 +; CHECK-NEXT: xvst $xr1, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <32 x i8>, ptr %a0 + %sel = select <32 x i1> , <32 x i8> , <32 x i8> %v0 + store <32 x i8> %sel, ptr %res + ret void +} + +define void @select_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: select_v32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: xvrepli.h $xr2, -256 +; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr0, $xr2 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <32 x i8>, ptr %a0 + %v1 = load <32 x i8>, ptr %a1 + %sel = select <32 x i1> , <32 x i8> %v0, <32 x i8> %v1 + store <32 x i8> %sel, ptr %res + ret void +} + +define void @select_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: select_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: lu12i.w $a1, -16 +; CHECK-NEXT: lu32i.d $a1, 0 +; CHECK-NEXT: xvreplgr2vr.w $xr2, $a1 +; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr0, $xr2 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i16>, ptr %a0 + %v1 = load <16 x i16>, ptr %a1 + %sel = select <16 x i1> , <16 x i16> %v0, <16 x i16> %v1 + store <16 x i16> %sel, ptr %res + ret void +} + +define void @select_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: select_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: ori $a1, $zero, 0 +; CHECK-NEXT: lu32i.d $a1, -1 +; CHECK-NEXT: xvreplgr2vr.d $xr2, $a1 +; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr0, $xr2 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i32>, ptr %a0 + %v1 = load <8 x i32>, ptr %a1 + %sel = select <8 x i1> , <8 x i32> %v0, <8 x i32> %v1 + store <8 x i32> %sel, ptr %res + ret void +} + +define void @select_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: select_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: pcalau12i $a3, %pc_hi20(.LCPI4_0) +; CHECK-NEXT: addi.d $a3, $a3, %pc_lo12(.LCPI4_0) +; CHECK-NEXT: xvld $xr0, $a3, 0 +; CHECK-NEXT: xvld $xr1, $a1, 0 +; CHECK-NEXT: xvld $xr2, $a2, 0 +; CHECK-NEXT: xvbitsel.v $xr0, $xr2, $xr1, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i64>, ptr %a0 + %v1 = load <4 x i64>, ptr %a1 + %sel = select <4 x i1> , <4 x i64> %v0, <4 x i64> %v1 + store <4 x i64> %sel, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll b/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll new file mode 100644 index 0000000000000000000000000000000000000000..ed1f610a5fa61515f64eafd01230ebda1a65c1c9 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll @@ -0,0 +1,398 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +define void @buildvector_v16i8_splat(ptr %dst, i8 %a0) nounwind { +; CHECK-LABEL: buildvector_v16i8_splat: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vreplgr2vr.b $vr0, $a1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %insert = insertelement <16 x i8> undef, i8 %a0, i8 0 + %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer + store <16 x i8> %splat, ptr %dst + ret void +} + +define void @buildvector_v8i16_splat(ptr %dst, i16 %a0) nounwind { +; CHECK-LABEL: buildvector_v8i16_splat: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vreplgr2vr.h $vr0, $a1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %insert = insertelement <8 x i16> undef, i16 %a0, i8 0 + %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer + store <8 x i16> %splat, ptr %dst + ret void +} + +define void @buildvector_v4i32_splat(ptr %dst, i32 %a0) nounwind { +; CHECK-LABEL: buildvector_v4i32_splat: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vreplgr2vr.w $vr0, $a1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %insert = insertelement <4 x i32> undef, i32 %a0, i8 0 + %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer + store <4 x i32> %splat, ptr %dst + ret void +} + +define void @buildvector_v2i64_splat(ptr %dst, i64 %a0) nounwind { +; CHECK-LABEL: buildvector_v2i64_splat: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vreplgr2vr.d $vr0, $a1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %insert = insertelement <2 x i64> undef, i64 %a0, i8 0 + %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer + store <2 x i64> %splat, ptr %dst + ret void +} + +define void @buildvector_v4f32_splat(ptr %dst, float %a0) nounwind { +; CHECK-LABEL: buildvector_v4f32_splat: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $f0 killed $f0 def $vr0 +; CHECK-NEXT: vreplvei.w $vr0, $vr0, 0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %insert = insertelement <4 x float> undef, float %a0, i8 0 + %splat = shufflevector <4 x float> %insert, <4 x float> undef, <4 x i32> zeroinitializer + store <4 x float> %splat, ptr %dst + ret void +} + +define void @buildvector_v2f64_splat(ptr %dst, double %a0) nounwind { +; CHECK-LABEL: buildvector_v2f64_splat: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0 +; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %insert = insertelement <2 x double> undef, double %a0, i8 0 + %splat = shufflevector <2 x double> %insert, <2 x double> undef, <2 x i32> zeroinitializer + store <2 x double> %splat, ptr %dst + ret void +} + +define void @buildvector_v16i8_const_splat(ptr %dst) nounwind { +; CHECK-LABEL: buildvector_v16i8_const_splat: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vrepli.b $vr0, 1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + store <16 x i8> , ptr %dst + ret void +} + +define void @buildvector_v8i16_const_splat(ptr %dst) nounwind { +; CHECK-LABEL: buildvector_v8i16_const_splat: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vrepli.h $vr0, 1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + store <8 x i16> , ptr %dst + ret void +} + +define void @buildvector_v4i32_const_splat(ptr %dst) nounwind { +; CHECK-LABEL: buildvector_v4i32_const_splat: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vrepli.w $vr0, 1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + store <4 x i32> , ptr %dst + ret void +} + +define void @buildvector_v2i64_const_splat(ptr %dst) nounwind { +; CHECK-LABEL: buildvector_v2i64_const_splat: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vrepli.d $vr0, 1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + store <2 x i64> , ptr %dst + ret void +} + +define void @buildvector_v2f32_const_splat(ptr %dst) nounwind { +; CHECK-LABEL: buildvector_v2f32_const_splat: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lu12i.w $a1, 260096 +; CHECK-NEXT: vreplgr2vr.w $vr0, $a1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + store <4 x float> , ptr %dst + ret void +} + +define void @buildvector_v2f64_const_splat(ptr %dst) nounwind { +; CHECK-LABEL: buildvector_v2f64_const_splat: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lu52i.d $a1, $zero, 1023 +; CHECK-NEXT: vreplgr2vr.d $vr0, $a1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + store <2 x double> , ptr %dst + ret void +} + +define void @buildvector_v16i8_const(ptr %dst) nounwind { +; CHECK-LABEL: buildvector_v16i8_const: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI12_0) +; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI12_0) +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + store <16 x i8> , ptr %dst + ret void +} + +define void @buildvector_v8i16_const(ptr %dst) nounwind { +; CHECK-LABEL: buildvector_v8i16_const: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI13_0) +; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI13_0) +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + store <8 x i16> , ptr %dst + ret void +} + +define void @buildvector_v4i32_const(ptr %dst) nounwind { +; CHECK-LABEL: buildvector_v4i32_const: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI14_0) +; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI14_0) +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + store <4 x i32> , ptr %dst + ret void +} + +define void @buildvector_v2i64_const(ptr %dst) nounwind { +; CHECK-LABEL: buildvector_v2i64_const: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI15_0) +; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI15_0) +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + store <2 x i64> , ptr %dst + ret void +} + +define void @buildvector_v2f32_const(ptr %dst) nounwind { +; CHECK-LABEL: buildvector_v2f32_const: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI16_0) +; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI16_0) +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + store <4 x float> , ptr %dst + ret void +} + +define void @buildvector_v2f64_const(ptr %dst) nounwind { +; CHECK-LABEL: buildvector_v2f64_const: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI17_0) +; CHECK-NEXT: addi.d $a1, $a1, %pc_lo12(.LCPI17_0) +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + store <2 x double> , ptr %dst + ret void +} + +define void @buildvector_v16i8(ptr %dst, i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7, i8 %a8, i8 %a9, i8 %a10, i8 %a11, i8 %a12, i8 %a13, i8 %a14, i8 %a15) nounwind { +; CHECK-LABEL: buildvector_v16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 0 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a2, 1 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a3, 2 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a4, 3 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a5, 4 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a6, 5 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a7, 6 +; CHECK-NEXT: ld.b $a1, $sp, 0 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 7 +; CHECK-NEXT: ld.b $a1, $sp, 8 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 8 +; CHECK-NEXT: ld.b $a1, $sp, 16 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 9 +; CHECK-NEXT: ld.b $a1, $sp, 24 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 10 +; CHECK-NEXT: ld.b $a1, $sp, 32 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 11 +; CHECK-NEXT: ld.b $a1, $sp, 40 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 12 +; CHECK-NEXT: ld.b $a1, $sp, 48 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 13 +; CHECK-NEXT: ld.b $a1, $sp, 56 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 14 +; CHECK-NEXT: ld.b $a1, $sp, 64 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 15 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %ins0 = insertelement <16 x i8> undef, i8 %a0, i32 0 + %ins1 = insertelement <16 x i8> %ins0, i8 %a1, i32 1 + %ins2 = insertelement <16 x i8> %ins1, i8 %a2, i32 2 + %ins3 = insertelement <16 x i8> %ins2, i8 %a3, i32 3 + %ins4 = insertelement <16 x i8> %ins3, i8 %a4, i32 4 + %ins5 = insertelement <16 x i8> %ins4, i8 %a5, i32 5 + %ins6 = insertelement <16 x i8> %ins5, i8 %a6, i32 6 + %ins7 = insertelement <16 x i8> %ins6, i8 %a7, i32 7 + %ins8 = insertelement <16 x i8> %ins7, i8 %a8, i32 8 + %ins9 = insertelement <16 x i8> %ins8, i8 %a9, i32 9 + %ins10 = insertelement <16 x i8> %ins9, i8 %a10, i32 10 + %ins11 = insertelement <16 x i8> %ins10, i8 %a11, i32 11 + %ins12 = insertelement <16 x i8> %ins11, i8 %a12, i32 12 + %ins13 = insertelement <16 x i8> %ins12, i8 %a13, i32 13 + %ins14 = insertelement <16 x i8> %ins13, i8 %a14, i32 14 + %ins15 = insertelement <16 x i8> %ins14, i8 %a15, i32 15 + store <16 x i8> %ins15, ptr %dst + ret void +} + +define void @buildvector_v8i16(ptr %dst, i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7) nounwind { +; CHECK-LABEL: buildvector_v8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 0 +; CHECK-NEXT: vinsgr2vr.h $vr0, $a2, 1 +; CHECK-NEXT: vinsgr2vr.h $vr0, $a3, 2 +; CHECK-NEXT: vinsgr2vr.h $vr0, $a4, 3 +; CHECK-NEXT: vinsgr2vr.h $vr0, $a5, 4 +; CHECK-NEXT: vinsgr2vr.h $vr0, $a6, 5 +; CHECK-NEXT: vinsgr2vr.h $vr0, $a7, 6 +; CHECK-NEXT: ld.h $a1, $sp, 0 +; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 7 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %ins0 = insertelement <8 x i16> undef, i16 %a0, i32 0 + %ins1 = insertelement <8 x i16> %ins0, i16 %a1, i32 1 + %ins2 = insertelement <8 x i16> %ins1, i16 %a2, i32 2 + %ins3 = insertelement <8 x i16> %ins2, i16 %a3, i32 3 + %ins4 = insertelement <8 x i16> %ins3, i16 %a4, i32 4 + %ins5 = insertelement <8 x i16> %ins4, i16 %a5, i32 5 + %ins6 = insertelement <8 x i16> %ins5, i16 %a6, i32 6 + %ins7 = insertelement <8 x i16> %ins6, i16 %a7, i32 7 + store <8 x i16> %ins7, ptr %dst + ret void +} + +define void @buildvector_v4i32(ptr %dst, i32 %a0, i32 %a1, i32 %a2, i32 %a3) nounwind { +; CHECK-LABEL: buildvector_v4i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vinsgr2vr.w $vr0, $a1, 0 +; CHECK-NEXT: vinsgr2vr.w $vr0, $a2, 1 +; CHECK-NEXT: vinsgr2vr.w $vr0, $a3, 2 +; CHECK-NEXT: vinsgr2vr.w $vr0, $a4, 3 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %ins0 = insertelement <4 x i32> undef, i32 %a0, i32 0 + %ins1 = insertelement <4 x i32> %ins0, i32 %a1, i32 1 + %ins2 = insertelement <4 x i32> %ins1, i32 %a2, i32 2 + %ins3 = insertelement <4 x i32> %ins2, i32 %a3, i32 3 + store <4 x i32> %ins3, ptr %dst + ret void +} + +define void @buildvector_v2i64(ptr %dst, i64 %a0, i64 %a1) nounwind { +; CHECK-LABEL: buildvector_v2i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vinsgr2vr.d $vr0, $a1, 0 +; CHECK-NEXT: vinsgr2vr.d $vr0, $a2, 1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %ins0 = insertelement <2 x i64> undef, i64 %a0, i32 0 + %ins1 = insertelement <2 x i64> %ins0, i64 %a1, i32 1 + store <2 x i64> %ins1, ptr %dst + ret void +} + +define void @buildvector_v4f32(ptr %dst, float %a0, float %a1, float %a2, float %a3) nounwind { +; CHECK-LABEL: buildvector_v4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movfr2gr.s $a1, $fa0 +; CHECK-NEXT: vinsgr2vr.w $vr0, $a1, 0 +; CHECK-NEXT: movfr2gr.s $a1, $fa1 +; CHECK-NEXT: vinsgr2vr.w $vr0, $a1, 1 +; CHECK-NEXT: movfr2gr.s $a1, $fa2 +; CHECK-NEXT: vinsgr2vr.w $vr0, $a1, 2 +; CHECK-NEXT: movfr2gr.s $a1, $fa3 +; CHECK-NEXT: vinsgr2vr.w $vr0, $a1, 3 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %ins0 = insertelement <4 x float> undef, float %a0, i32 0 + %ins1 = insertelement <4 x float> %ins0, float %a1, i32 1 + %ins2 = insertelement <4 x float> %ins1, float %a2, i32 2 + %ins3 = insertelement <4 x float> %ins2, float %a3, i32 3 + store <4 x float> %ins3, ptr %dst + ret void +} + +define void @buildvector_v2f64(ptr %dst, double %a0, double %a1) nounwind { +; CHECK-LABEL: buildvector_v2f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movfr2gr.d $a1, $fa0 +; CHECK-NEXT: vinsgr2vr.d $vr0, $a1, 0 +; CHECK-NEXT: movfr2gr.d $a1, $fa1 +; CHECK-NEXT: vinsgr2vr.d $vr0, $a1, 1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %ins0 = insertelement <2 x double> undef, double %a0, i32 0 + %ins1 = insertelement <2 x double> %ins0, double %a1, i32 1 + store <2 x double> %ins1, ptr %dst + ret void +} + +;; BUILD_VECTOR through stack. +;; If `isShuffleMaskLegal` returns true, it will lead to an infinite loop. +define void @extract1_i32_zext_insert0_i64_undef(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: extract1_i32_zext_insert0_i64_undef: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 1 +; CHECK-NEXT: bstrpick.d $a0, $a0, 31, 0 +; CHECK-NEXT: st.d $a0, $sp, 0 +; CHECK-NEXT: vld $vr0, $sp, 0 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: ret + %v = load volatile <4 x i32>, ptr %src + %e = extractelement <4 x i32> %v, i32 1 + %z = zext i32 %e to i64 + %r = insertelement <2 x i64> undef, i64 %z, i32 0 + store <2 x i64> %r, ptr %dst + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/ctpop-ctlz.ll b/llvm/test/CodeGen/LoongArch/lsx/ctpop-ctlz.ll new file mode 100644 index 0000000000000000000000000000000000000000..5df553fba7ef773f9eac09cd2e4d5e686c719384 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/ctpop-ctlz.ll @@ -0,0 +1,115 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +define void @ctpop_v16i8(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: ctpop_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vpcnt.b $vr0, $vr0 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: ret + %v = load <16 x i8>, ptr %src + %res = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %v) + store <16 x i8> %res, ptr %dst + ret void +} + +define void @ctpop_v8i16(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: ctpop_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vpcnt.h $vr0, $vr0 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: ret + %v = load <8 x i16>, ptr %src + %res = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %v) + store <8 x i16> %res, ptr %dst + ret void +} + +define void @ctpop_v4i32(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: ctpop_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vpcnt.w $vr0, $vr0 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: ret + %v = load <4 x i32>, ptr %src + %res = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %v) + store <4 x i32> %res, ptr %dst + ret void +} + +define void @ctpop_v2i64(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: ctpop_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vpcnt.d $vr0, $vr0 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: ret + %v = load <2 x i64>, ptr %src + %res = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %v) + store <2 x i64> %res, ptr %dst + ret void +} + +define void @ctlz_v16i8(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: ctlz_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vclz.b $vr0, $vr0 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: ret + %v = load <16 x i8>, ptr %src + %res = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %v, i1 false) + store <16 x i8> %res, ptr %dst + ret void +} + +define void @ctlz_v8i16(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: ctlz_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vclz.h $vr0, $vr0 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: ret + %v = load <8 x i16>, ptr %src + %res = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %v, i1 false) + store <8 x i16> %res, ptr %dst + ret void +} + +define void @ctlz_v4i32(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: ctlz_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vclz.w $vr0, $vr0 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: ret + %v = load <4 x i32>, ptr %src + %res = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %v, i1 false) + store <4 x i32> %res, ptr %dst + ret void +} + +define void @ctlz_v2i64(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: ctlz_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vclz.d $vr0, $vr0 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: ret + %v = load <2 x i64>, ptr %src + %res = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %v, i1 false) + store <2 x i64> %res, ptr %dst + ret void +} + +declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8>) +declare <8 x i16> @llvm.ctpop.v8i16(<8 x i16>) +declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>) +declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>) +declare <16 x i8> @llvm.ctlz.v16i8(<16 x i8>, i1) +declare <8 x i16> @llvm.ctlz.v8i16(<8 x i16>, i1) +declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1) +declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1) diff --git a/llvm/test/CodeGen/LoongArch/lsx/fma-v2f64.ll b/llvm/test/CodeGen/LoongArch/lsx/fma-v2f64.ll new file mode 100644 index 0000000000000000000000000000000000000000..8e0459b4afabef3499b852d623365360231ebbb8 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/fma-v2f64.ll @@ -0,0 +1,804 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx --fp-contract=fast < %s \ +; RUN: | FileCheck %s --check-prefix=CONTRACT-FAST +; RUN: llc --mtriple=loongarch64 --mattr=+lsx --fp-contract=on < %s \ +; RUN: | FileCheck %s --check-prefix=CONTRACT-ON +; RUN: llc --mtriple=loongarch64 --mattr=+lsx --fp-contract=off < %s \ +; RUN: | FileCheck %s --check-prefix=CONTRACT-OFF + +define void @vfmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: vfmadd_d: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 +; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 +; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 +; CONTRACT-FAST-NEXT: vfmadd.d $vr0, $vr2, $vr1, $vr0 +; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: vfmadd_d: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 +; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 +; CONTRACT-ON-NEXT: vfmul.d $vr0, $vr1, $vr0 +; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 +; CONTRACT-ON-NEXT: vfadd.d $vr0, $vr0, $vr1 +; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: vfmadd_d: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 +; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 +; CONTRACT-OFF-NEXT: vfmul.d $vr0, $vr1, $vr0 +; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 +; CONTRACT-OFF-NEXT: vfadd.d $vr0, $vr0, $vr1 +; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %v2 = load <2 x double>, ptr %a2 + %mul = fmul<2 x double> %v0, %v1 + %add = fadd<2 x double> %mul, %v2 + store <2 x double> %add, ptr %res + ret void +} + +define void @vfmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: vfmsub_d: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 +; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 +; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 +; CONTRACT-FAST-NEXT: vfmsub.d $vr0, $vr2, $vr1, $vr0 +; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: vfmsub_d: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 +; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 +; CONTRACT-ON-NEXT: vfmul.d $vr0, $vr1, $vr0 +; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 +; CONTRACT-ON-NEXT: vfsub.d $vr0, $vr0, $vr1 +; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: vfmsub_d: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 +; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 +; CONTRACT-OFF-NEXT: vfmul.d $vr0, $vr1, $vr0 +; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 +; CONTRACT-OFF-NEXT: vfsub.d $vr0, $vr0, $vr1 +; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %v2 = load <2 x double>, ptr %a2 + %mul = fmul<2 x double> %v0, %v1 + %sub = fsub<2 x double> %mul, %v2 + store <2 x double> %sub, ptr %res + ret void +} + +define void @vfnmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: vfnmadd_d: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 +; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 +; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 +; CONTRACT-FAST-NEXT: vfnmadd.d $vr0, $vr2, $vr1, $vr0 +; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: vfnmadd_d: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 +; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 +; CONTRACT-ON-NEXT: vfmul.d $vr0, $vr1, $vr0 +; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 +; CONTRACT-ON-NEXT: vfadd.d $vr0, $vr0, $vr1 +; CONTRACT-ON-NEXT: vbitrevi.d $vr0, $vr0, 63 +; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: vfnmadd_d: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 +; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 +; CONTRACT-OFF-NEXT: vfmul.d $vr0, $vr1, $vr0 +; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 +; CONTRACT-OFF-NEXT: vfadd.d $vr0, $vr0, $vr1 +; CONTRACT-OFF-NEXT: vbitrevi.d $vr0, $vr0, 63 +; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %v2 = load <2 x double>, ptr %a2 + %mul = fmul<2 x double> %v0, %v1 + %add = fadd<2 x double> %mul, %v2 + %negadd = fneg<2 x double> %add + store <2 x double> %negadd, ptr %res + ret void +} + +define void @vfnmadd_d_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: vfnmadd_d_nsz: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 +; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 +; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 +; CONTRACT-FAST-NEXT: vfnmadd.d $vr0, $vr2, $vr1, $vr0 +; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: vfnmadd_d_nsz: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 +; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 +; CONTRACT-ON-NEXT: vbitrevi.d $vr1, $vr1, 63 +; CONTRACT-ON-NEXT: vfmul.d $vr0, $vr1, $vr0 +; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 +; CONTRACT-ON-NEXT: vfsub.d $vr0, $vr0, $vr1 +; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: vfnmadd_d_nsz: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 +; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 +; CONTRACT-OFF-NEXT: vbitrevi.d $vr1, $vr1, 63 +; CONTRACT-OFF-NEXT: vfmul.d $vr0, $vr1, $vr0 +; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 +; CONTRACT-OFF-NEXT: vfsub.d $vr0, $vr0, $vr1 +; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %v2 = load <2 x double>, ptr %a2 + %negv0 = fneg nsz<2 x double> %v0 + %negv2 = fneg nsz<2 x double> %v2 + %mul = fmul nsz<2 x double> %negv0, %v1 + %add = fadd nsz<2 x double> %mul, %negv2 + store <2 x double> %add, ptr %res + ret void +} + +;; Check that vfnmadd.d is not emitted. +define void @not_vfnmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: not_vfnmadd_d: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 +; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 +; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 +; CONTRACT-FAST-NEXT: vbitrevi.d $vr2, $vr2, 63 +; CONTRACT-FAST-NEXT: vfmsub.d $vr0, $vr2, $vr1, $vr0 +; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: not_vfnmadd_d: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 +; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 +; CONTRACT-ON-NEXT: vbitrevi.d $vr1, $vr1, 63 +; CONTRACT-ON-NEXT: vfmul.d $vr0, $vr1, $vr0 +; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 +; CONTRACT-ON-NEXT: vfsub.d $vr0, $vr0, $vr1 +; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: not_vfnmadd_d: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 +; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 +; CONTRACT-OFF-NEXT: vbitrevi.d $vr1, $vr1, 63 +; CONTRACT-OFF-NEXT: vfmul.d $vr0, $vr1, $vr0 +; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 +; CONTRACT-OFF-NEXT: vfsub.d $vr0, $vr0, $vr1 +; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %v2 = load <2 x double>, ptr %a2 + %negv0 = fneg<2 x double> %v0 + %negv2 = fneg<2 x double> %v2 + %mul = fmul<2 x double> %negv0, %v1 + %add = fadd<2 x double> %mul, %negv2 + store <2 x double> %add, ptr %res + ret void +} + +define void @vfnmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: vfnmsub_d: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 +; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 +; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 +; CONTRACT-FAST-NEXT: vfnmsub.d $vr0, $vr2, $vr1, $vr0 +; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: vfnmsub_d: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 +; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 +; CONTRACT-ON-NEXT: vfmul.d $vr0, $vr1, $vr0 +; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 +; CONTRACT-ON-NEXT: vfsub.d $vr0, $vr0, $vr1 +; CONTRACT-ON-NEXT: vbitrevi.d $vr0, $vr0, 63 +; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: vfnmsub_d: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 +; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 +; CONTRACT-OFF-NEXT: vfmul.d $vr0, $vr1, $vr0 +; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 +; CONTRACT-OFF-NEXT: vfsub.d $vr0, $vr0, $vr1 +; CONTRACT-OFF-NEXT: vbitrevi.d $vr0, $vr0, 63 +; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %v2 = load <2 x double>, ptr %a2 + %negv2 = fneg<2 x double> %v2 + %mul = fmul<2 x double> %v0, %v1 + %add = fadd<2 x double> %mul, %negv2 + %neg = fneg<2 x double> %add + store <2 x double> %neg, ptr %res + ret void +} + +define void @vfnmsub_d_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: vfnmsub_d_nsz: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 +; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 +; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 +; CONTRACT-FAST-NEXT: vfnmsub.d $vr0, $vr2, $vr1, $vr0 +; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: vfnmsub_d_nsz: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 +; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 +; CONTRACT-ON-NEXT: vfmul.d $vr0, $vr1, $vr0 +; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 +; CONTRACT-ON-NEXT: vfsub.d $vr0, $vr1, $vr0 +; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: vfnmsub_d_nsz: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 +; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 +; CONTRACT-OFF-NEXT: vfmul.d $vr0, $vr1, $vr0 +; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 +; CONTRACT-OFF-NEXT: vfsub.d $vr0, $vr1, $vr0 +; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %v2 = load <2 x double>, ptr %a2 + %negv0 = fneg nsz<2 x double> %v0 + %mul = fmul nsz<2 x double> %negv0, %v1 + %add = fadd nsz<2 x double> %mul, %v2 + store <2 x double> %add, ptr %res + ret void +} + +;; Check that vfnmsub.d is not emitted. +define void @not_vfnmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: not_vfnmsub_d: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 +; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 +; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 +; CONTRACT-FAST-NEXT: vbitrevi.d $vr2, $vr2, 63 +; CONTRACT-FAST-NEXT: vfmadd.d $vr0, $vr2, $vr1, $vr0 +; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: not_vfnmsub_d: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 +; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 +; CONTRACT-ON-NEXT: vfmul.d $vr0, $vr1, $vr0 +; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 +; CONTRACT-ON-NEXT: vfsub.d $vr0, $vr1, $vr0 +; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: not_vfnmsub_d: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 +; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 +; CONTRACT-OFF-NEXT: vfmul.d $vr0, $vr1, $vr0 +; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 +; CONTRACT-OFF-NEXT: vfsub.d $vr0, $vr1, $vr0 +; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %v2 = load <2 x double>, ptr %a2 + %negv0 = fneg<2 x double> %v0 + %mul = fmul<2 x double> %negv0, %v1 + %add = fadd<2 x double> %mul, %v2 + store <2 x double> %add, ptr %res + ret void +} + +define void @contract_vfmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: contract_vfmadd_d: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 +; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 +; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 +; CONTRACT-FAST-NEXT: vfmadd.d $vr0, $vr2, $vr1, $vr0 +; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: contract_vfmadd_d: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 +; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 +; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 +; CONTRACT-ON-NEXT: vfmadd.d $vr0, $vr2, $vr1, $vr0 +; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: contract_vfmadd_d: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 +; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 +; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 +; CONTRACT-OFF-NEXT: vfmadd.d $vr0, $vr2, $vr1, $vr0 +; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %v2 = load <2 x double>, ptr %a2 + %mul = fmul contract <2 x double> %v0, %v1 + %add = fadd contract <2 x double> %mul, %v2 + store <2 x double> %add, ptr %res + ret void +} + +define void @contract_vfmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: contract_vfmsub_d: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 +; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 +; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 +; CONTRACT-FAST-NEXT: vfmsub.d $vr0, $vr2, $vr1, $vr0 +; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: contract_vfmsub_d: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 +; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 +; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 +; CONTRACT-ON-NEXT: vfmsub.d $vr0, $vr2, $vr1, $vr0 +; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: contract_vfmsub_d: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 +; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 +; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 +; CONTRACT-OFF-NEXT: vfmsub.d $vr0, $vr2, $vr1, $vr0 +; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %v2 = load <2 x double>, ptr %a2 + %mul = fmul contract <2 x double> %v0, %v1 + %sub = fsub contract <2 x double> %mul, %v2 + store <2 x double> %sub, ptr %res + ret void +} + +define void @contract_vfnmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: contract_vfnmadd_d: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 +; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 +; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 +; CONTRACT-FAST-NEXT: vfnmadd.d $vr0, $vr2, $vr1, $vr0 +; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: contract_vfnmadd_d: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 +; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 +; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 +; CONTRACT-ON-NEXT: vfnmadd.d $vr0, $vr2, $vr1, $vr0 +; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: contract_vfnmadd_d: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 +; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 +; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 +; CONTRACT-OFF-NEXT: vfnmadd.d $vr0, $vr2, $vr1, $vr0 +; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %v2 = load <2 x double>, ptr %a2 + %mul = fmul contract <2 x double> %v0, %v1 + %add = fadd contract <2 x double> %mul, %v2 + %negadd = fneg contract <2 x double> %add + store <2 x double> %negadd, ptr %res + ret void +} + +define void @contract_vfnmadd_d_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: contract_vfnmadd_d_nsz: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 +; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 +; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 +; CONTRACT-FAST-NEXT: vfnmadd.d $vr0, $vr2, $vr1, $vr0 +; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: contract_vfnmadd_d_nsz: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 +; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 +; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 +; CONTRACT-ON-NEXT: vfnmadd.d $vr0, $vr2, $vr1, $vr0 +; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: contract_vfnmadd_d_nsz: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 +; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 +; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 +; CONTRACT-OFF-NEXT: vfnmadd.d $vr0, $vr2, $vr1, $vr0 +; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %v2 = load <2 x double>, ptr %a2 + %negv0 = fneg contract nsz<2 x double> %v0 + %negv2 = fneg contract nsz<2 x double> %v2 + %mul = fmul contract nsz<2 x double> %negv0, %v1 + %add = fadd contract nsz<2 x double> %mul, %negv2 + store <2 x double> %add, ptr %res + ret void +} + +;; Check that vfnmadd.d is not emitted. +define void @not_contract_vfnmadd_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: not_contract_vfnmadd_d: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 +; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 +; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 +; CONTRACT-FAST-NEXT: vbitrevi.d $vr2, $vr2, 63 +; CONTRACT-FAST-NEXT: vfmsub.d $vr0, $vr2, $vr1, $vr0 +; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: not_contract_vfnmadd_d: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 +; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 +; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 +; CONTRACT-ON-NEXT: vbitrevi.d $vr2, $vr2, 63 +; CONTRACT-ON-NEXT: vfmsub.d $vr0, $vr2, $vr1, $vr0 +; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: not_contract_vfnmadd_d: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 +; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 +; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 +; CONTRACT-OFF-NEXT: vbitrevi.d $vr2, $vr2, 63 +; CONTRACT-OFF-NEXT: vfmsub.d $vr0, $vr2, $vr1, $vr0 +; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %v2 = load <2 x double>, ptr %a2 + %negv0 = fneg contract <2 x double> %v0 + %negv2 = fneg contract <2 x double> %v2 + %mul = fmul contract <2 x double> %negv0, %v1 + %add = fadd contract <2 x double> %mul, %negv2 + store <2 x double> %add, ptr %res + ret void +} + +define void @contract_vfnmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: contract_vfnmsub_d: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 +; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 +; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 +; CONTRACT-FAST-NEXT: vfnmsub.d $vr0, $vr2, $vr1, $vr0 +; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: contract_vfnmsub_d: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 +; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 +; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 +; CONTRACT-ON-NEXT: vfnmsub.d $vr0, $vr2, $vr1, $vr0 +; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: contract_vfnmsub_d: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 +; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 +; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 +; CONTRACT-OFF-NEXT: vfnmsub.d $vr0, $vr2, $vr1, $vr0 +; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %v2 = load <2 x double>, ptr %a2 + %negv2 = fneg contract <2 x double> %v2 + %mul = fmul contract <2 x double> %v0, %v1 + %add = fadd contract <2 x double> %mul, %negv2 + %neg = fneg contract <2 x double> %add + store <2 x double> %neg, ptr %res + ret void +} + +define void @contract_vfnmsub_d_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: contract_vfnmsub_d_nsz: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 +; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 +; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 +; CONTRACT-FAST-NEXT: vfnmsub.d $vr0, $vr2, $vr1, $vr0 +; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: contract_vfnmsub_d_nsz: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 +; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 +; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 +; CONTRACT-ON-NEXT: vfnmsub.d $vr0, $vr2, $vr1, $vr0 +; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: contract_vfnmsub_d_nsz: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 +; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 +; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 +; CONTRACT-OFF-NEXT: vfnmsub.d $vr0, $vr2, $vr1, $vr0 +; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %v2 = load <2 x double>, ptr %a2 + %negv0 = fneg contract nsz<2 x double> %v0 + %mul = fmul contract nsz<2 x double> %negv0, %v1 + %add = fadd contract nsz<2 x double> %mul, %v2 + store <2 x double> %add, ptr %res + ret void +} + +;; Check that vfnmsub.d is not emitted. +define void @not_contract_vfnmsub_d(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: not_contract_vfnmsub_d: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 +; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 +; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 +; CONTRACT-FAST-NEXT: vbitrevi.d $vr2, $vr2, 63 +; CONTRACT-FAST-NEXT: vfmadd.d $vr0, $vr2, $vr1, $vr0 +; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: not_contract_vfnmsub_d: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 +; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 +; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 +; CONTRACT-ON-NEXT: vbitrevi.d $vr2, $vr2, 63 +; CONTRACT-ON-NEXT: vfmadd.d $vr0, $vr2, $vr1, $vr0 +; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: not_contract_vfnmsub_d: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 +; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 +; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 +; CONTRACT-OFF-NEXT: vbitrevi.d $vr2, $vr2, 63 +; CONTRACT-OFF-NEXT: vfmadd.d $vr0, $vr2, $vr1, $vr0 +; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %v2 = load <2 x double>, ptr %a2 + %negv0 = fneg contract <2 x double> %v0 + %mul = fmul contract <2 x double> %negv0, %v1 + %add = fadd contract <2 x double> %mul, %v2 + store <2 x double> %add, ptr %res + ret void +} + +define void @vfmadd_d_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: vfmadd_d_contract: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 +; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 +; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 +; CONTRACT-FAST-NEXT: vfmadd.d $vr0, $vr2, $vr1, $vr0 +; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: vfmadd_d_contract: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 +; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 +; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 +; CONTRACT-ON-NEXT: vfmadd.d $vr0, $vr2, $vr1, $vr0 +; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: vfmadd_d_contract: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 +; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 +; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 +; CONTRACT-OFF-NEXT: vfmadd.d $vr0, $vr2, $vr1, $vr0 +; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %v2 = load <2 x double>, ptr %a2 + %mul = fmul contract <2 x double> %v0, %v1 + %add = fadd contract <2 x double> %mul, %v2 + store <2 x double> %add, ptr %res + ret void +} + +define void @vfmsub_d_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: vfmsub_d_contract: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 +; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 +; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 +; CONTRACT-FAST-NEXT: vfmsub.d $vr0, $vr2, $vr1, $vr0 +; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: vfmsub_d_contract: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 +; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 +; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 +; CONTRACT-ON-NEXT: vfmsub.d $vr0, $vr2, $vr1, $vr0 +; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: vfmsub_d_contract: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 +; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 +; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 +; CONTRACT-OFF-NEXT: vfmsub.d $vr0, $vr2, $vr1, $vr0 +; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %v2 = load <2 x double>, ptr %a2 + %mul = fmul contract <2 x double> %v0, %v1 + %sub = fsub contract <2 x double> %mul, %v2 + store <2 x double> %sub, ptr %res + ret void +} + +define void @vfnmadd_d_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: vfnmadd_d_contract: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 +; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 +; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 +; CONTRACT-FAST-NEXT: vfnmadd.d $vr0, $vr2, $vr1, $vr0 +; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: vfnmadd_d_contract: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 +; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 +; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 +; CONTRACT-ON-NEXT: vfnmadd.d $vr0, $vr2, $vr1, $vr0 +; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: vfnmadd_d_contract: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 +; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 +; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 +; CONTRACT-OFF-NEXT: vfnmadd.d $vr0, $vr2, $vr1, $vr0 +; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %v2 = load <2 x double>, ptr %a2 + %mul = fmul contract <2 x double> %v0, %v1 + %add = fadd contract <2 x double> %mul, %v2 + %negadd = fneg contract <2 x double> %add + store <2 x double> %negadd, ptr %res + ret void +} + +define void @vfnmsub_d_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: vfnmsub_d_contract: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 +; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 +; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 +; CONTRACT-FAST-NEXT: vfnmsub.d $vr0, $vr2, $vr1, $vr0 +; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: vfnmsub_d_contract: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 +; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 +; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 +; CONTRACT-ON-NEXT: vfnmsub.d $vr0, $vr2, $vr1, $vr0 +; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: vfnmsub_d_contract: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 +; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 +; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 +; CONTRACT-OFF-NEXT: vfnmsub.d $vr0, $vr2, $vr1, $vr0 +; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %v2 = load <2 x double>, ptr %a2 + %mul = fmul contract <2 x double> %v0, %v1 + %negv2 = fneg contract <2 x double> %v2 + %add = fadd contract <2 x double> %negv2, %mul + %negadd = fneg contract <2 x double> %add + store <2 x double> %negadd, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/fma-v4f32.ll b/llvm/test/CodeGen/LoongArch/lsx/fma-v4f32.ll new file mode 100644 index 0000000000000000000000000000000000000000..7efbd61c0c4f7b5fac52ad297de76030b1835e6f --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/fma-v4f32.ll @@ -0,0 +1,804 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx --fp-contract=fast < %s \ +; RUN: | FileCheck %s --check-prefix=CONTRACT-FAST +; RUN: llc --mtriple=loongarch64 --mattr=+lsx --fp-contract=on < %s \ +; RUN: | FileCheck %s --check-prefix=CONTRACT-ON +; RUN: llc --mtriple=loongarch64 --mattr=+lsx --fp-contract=off < %s \ +; RUN: | FileCheck %s --check-prefix=CONTRACT-OFF + +define void @vfmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: vfmadd_s: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 +; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 +; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 +; CONTRACT-FAST-NEXT: vfmadd.s $vr0, $vr2, $vr1, $vr0 +; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: vfmadd_s: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 +; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 +; CONTRACT-ON-NEXT: vfmul.s $vr0, $vr1, $vr0 +; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 +; CONTRACT-ON-NEXT: vfadd.s $vr0, $vr0, $vr1 +; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: vfmadd_s: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 +; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 +; CONTRACT-OFF-NEXT: vfmul.s $vr0, $vr1, $vr0 +; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 +; CONTRACT-OFF-NEXT: vfadd.s $vr0, $vr0, $vr1 +; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %v2 = load <4 x float>, ptr %a2 + %mul = fmul<4 x float> %v0, %v1 + %add = fadd<4 x float> %mul, %v2 + store <4 x float> %add, ptr %res + ret void +} + +define void @vfmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: vfmsub_s: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 +; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 +; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 +; CONTRACT-FAST-NEXT: vfmsub.s $vr0, $vr2, $vr1, $vr0 +; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: vfmsub_s: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 +; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 +; CONTRACT-ON-NEXT: vfmul.s $vr0, $vr1, $vr0 +; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 +; CONTRACT-ON-NEXT: vfsub.s $vr0, $vr0, $vr1 +; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: vfmsub_s: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 +; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 +; CONTRACT-OFF-NEXT: vfmul.s $vr0, $vr1, $vr0 +; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 +; CONTRACT-OFF-NEXT: vfsub.s $vr0, $vr0, $vr1 +; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %v2 = load <4 x float>, ptr %a2 + %mul = fmul<4 x float> %v0, %v1 + %sub = fsub<4 x float> %mul, %v2 + store <4 x float> %sub, ptr %res + ret void +} + +define void @vfnmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: vfnmadd_s: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 +; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 +; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 +; CONTRACT-FAST-NEXT: vfnmadd.s $vr0, $vr2, $vr1, $vr0 +; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: vfnmadd_s: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 +; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 +; CONTRACT-ON-NEXT: vfmul.s $vr0, $vr1, $vr0 +; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 +; CONTRACT-ON-NEXT: vfadd.s $vr0, $vr0, $vr1 +; CONTRACT-ON-NEXT: vbitrevi.w $vr0, $vr0, 31 +; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: vfnmadd_s: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 +; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 +; CONTRACT-OFF-NEXT: vfmul.s $vr0, $vr1, $vr0 +; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 +; CONTRACT-OFF-NEXT: vfadd.s $vr0, $vr0, $vr1 +; CONTRACT-OFF-NEXT: vbitrevi.w $vr0, $vr0, 31 +; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %v2 = load <4 x float>, ptr %a2 + %mul = fmul<4 x float> %v0, %v1 + %add = fadd<4 x float> %mul, %v2 + %negadd = fneg<4 x float> %add + store <4 x float> %negadd, ptr %res + ret void +} + +define void @vfnmadd_s_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: vfnmadd_s_nsz: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 +; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 +; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 +; CONTRACT-FAST-NEXT: vfnmadd.s $vr0, $vr2, $vr1, $vr0 +; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: vfnmadd_s_nsz: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 +; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 +; CONTRACT-ON-NEXT: vbitrevi.w $vr1, $vr1, 31 +; CONTRACT-ON-NEXT: vfmul.s $vr0, $vr1, $vr0 +; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 +; CONTRACT-ON-NEXT: vfsub.s $vr0, $vr0, $vr1 +; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: vfnmadd_s_nsz: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 +; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 +; CONTRACT-OFF-NEXT: vbitrevi.w $vr1, $vr1, 31 +; CONTRACT-OFF-NEXT: vfmul.s $vr0, $vr1, $vr0 +; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 +; CONTRACT-OFF-NEXT: vfsub.s $vr0, $vr0, $vr1 +; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %v2 = load <4 x float>, ptr %a2 + %negv0 = fneg nsz<4 x float> %v0 + %negv2 = fneg nsz<4 x float> %v2 + %mul = fmul nsz<4 x float> %negv0, %v1 + %add = fadd nsz<4 x float> %mul, %negv2 + store <4 x float> %add, ptr %res + ret void +} + +;; Check that vfnmadd.s is not emitted. +define void @not_vfnmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: not_vfnmadd_s: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 +; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 +; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 +; CONTRACT-FAST-NEXT: vbitrevi.w $vr2, $vr2, 31 +; CONTRACT-FAST-NEXT: vfmsub.s $vr0, $vr2, $vr1, $vr0 +; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: not_vfnmadd_s: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 +; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 +; CONTRACT-ON-NEXT: vbitrevi.w $vr1, $vr1, 31 +; CONTRACT-ON-NEXT: vfmul.s $vr0, $vr1, $vr0 +; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 +; CONTRACT-ON-NEXT: vfsub.s $vr0, $vr0, $vr1 +; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: not_vfnmadd_s: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 +; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 +; CONTRACT-OFF-NEXT: vbitrevi.w $vr1, $vr1, 31 +; CONTRACT-OFF-NEXT: vfmul.s $vr0, $vr1, $vr0 +; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 +; CONTRACT-OFF-NEXT: vfsub.s $vr0, $vr0, $vr1 +; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %v2 = load <4 x float>, ptr %a2 + %negv0 = fneg<4 x float> %v0 + %negv2 = fneg<4 x float> %v2 + %mul = fmul<4 x float> %negv0, %v1 + %add = fadd<4 x float> %mul, %negv2 + store <4 x float> %add, ptr %res + ret void +} + +define void @vfnmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: vfnmsub_s: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 +; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 +; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 +; CONTRACT-FAST-NEXT: vfnmsub.s $vr0, $vr2, $vr1, $vr0 +; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: vfnmsub_s: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 +; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 +; CONTRACT-ON-NEXT: vfmul.s $vr0, $vr1, $vr0 +; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 +; CONTRACT-ON-NEXT: vfsub.s $vr0, $vr0, $vr1 +; CONTRACT-ON-NEXT: vbitrevi.w $vr0, $vr0, 31 +; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: vfnmsub_s: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 +; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 +; CONTRACT-OFF-NEXT: vfmul.s $vr0, $vr1, $vr0 +; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 +; CONTRACT-OFF-NEXT: vfsub.s $vr0, $vr0, $vr1 +; CONTRACT-OFF-NEXT: vbitrevi.w $vr0, $vr0, 31 +; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %v2 = load <4 x float>, ptr %a2 + %negv2 = fneg<4 x float> %v2 + %mul = fmul<4 x float> %v0, %v1 + %add = fadd<4 x float> %mul, %negv2 + %neg = fneg<4 x float> %add + store <4 x float> %neg, ptr %res + ret void +} + +define void @vfnmsub_s_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: vfnmsub_s_nsz: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 +; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 +; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 +; CONTRACT-FAST-NEXT: vfnmsub.s $vr0, $vr2, $vr1, $vr0 +; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: vfnmsub_s_nsz: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 +; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 +; CONTRACT-ON-NEXT: vfmul.s $vr0, $vr1, $vr0 +; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 +; CONTRACT-ON-NEXT: vfsub.s $vr0, $vr1, $vr0 +; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: vfnmsub_s_nsz: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 +; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 +; CONTRACT-OFF-NEXT: vfmul.s $vr0, $vr1, $vr0 +; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 +; CONTRACT-OFF-NEXT: vfsub.s $vr0, $vr1, $vr0 +; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %v2 = load <4 x float>, ptr %a2 + %negv0 = fneg nsz<4 x float> %v0 + %mul = fmul nsz<4 x float> %negv0, %v1 + %add = fadd nsz<4 x float> %mul, %v2 + store <4 x float> %add, ptr %res + ret void +} + +;; Check that vfnmsub.s is not emitted. +define void @not_vfnmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: not_vfnmsub_s: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 +; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 +; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 +; CONTRACT-FAST-NEXT: vbitrevi.w $vr2, $vr2, 31 +; CONTRACT-FAST-NEXT: vfmadd.s $vr0, $vr2, $vr1, $vr0 +; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: not_vfnmsub_s: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: vld $vr0, $a2, 0 +; CONTRACT-ON-NEXT: vld $vr1, $a1, 0 +; CONTRACT-ON-NEXT: vfmul.s $vr0, $vr1, $vr0 +; CONTRACT-ON-NEXT: vld $vr1, $a3, 0 +; CONTRACT-ON-NEXT: vfsub.s $vr0, $vr1, $vr0 +; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: not_vfnmsub_s: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: vld $vr0, $a2, 0 +; CONTRACT-OFF-NEXT: vld $vr1, $a1, 0 +; CONTRACT-OFF-NEXT: vfmul.s $vr0, $vr1, $vr0 +; CONTRACT-OFF-NEXT: vld $vr1, $a3, 0 +; CONTRACT-OFF-NEXT: vfsub.s $vr0, $vr1, $vr0 +; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %v2 = load <4 x float>, ptr %a2 + %negv0 = fneg<4 x float> %v0 + %mul = fmul<4 x float> %negv0, %v1 + %add = fadd<4 x float> %mul, %v2 + store <4 x float> %add, ptr %res + ret void +} + +define void @contract_vfmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: contract_vfmadd_s: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 +; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 +; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 +; CONTRACT-FAST-NEXT: vfmadd.s $vr0, $vr2, $vr1, $vr0 +; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: contract_vfmadd_s: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 +; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 +; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 +; CONTRACT-ON-NEXT: vfmadd.s $vr0, $vr2, $vr1, $vr0 +; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: contract_vfmadd_s: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 +; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 +; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 +; CONTRACT-OFF-NEXT: vfmadd.s $vr0, $vr2, $vr1, $vr0 +; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %v2 = load <4 x float>, ptr %a2 + %mul = fmul contract <4 x float> %v0, %v1 + %add = fadd contract <4 x float> %mul, %v2 + store <4 x float> %add, ptr %res + ret void +} + +define void @contract_vfmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: contract_vfmsub_s: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 +; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 +; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 +; CONTRACT-FAST-NEXT: vfmsub.s $vr0, $vr2, $vr1, $vr0 +; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: contract_vfmsub_s: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 +; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 +; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 +; CONTRACT-ON-NEXT: vfmsub.s $vr0, $vr2, $vr1, $vr0 +; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: contract_vfmsub_s: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 +; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 +; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 +; CONTRACT-OFF-NEXT: vfmsub.s $vr0, $vr2, $vr1, $vr0 +; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %v2 = load <4 x float>, ptr %a2 + %mul = fmul contract <4 x float> %v0, %v1 + %sub = fsub contract <4 x float> %mul, %v2 + store <4 x float> %sub, ptr %res + ret void +} + +define void @contract_vfnmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: contract_vfnmadd_s: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 +; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 +; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 +; CONTRACT-FAST-NEXT: vfnmadd.s $vr0, $vr2, $vr1, $vr0 +; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: contract_vfnmadd_s: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 +; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 +; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 +; CONTRACT-ON-NEXT: vfnmadd.s $vr0, $vr2, $vr1, $vr0 +; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: contract_vfnmadd_s: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 +; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 +; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 +; CONTRACT-OFF-NEXT: vfnmadd.s $vr0, $vr2, $vr1, $vr0 +; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %v2 = load <4 x float>, ptr %a2 + %mul = fmul contract <4 x float> %v0, %v1 + %add = fadd contract <4 x float> %mul, %v2 + %negadd = fneg contract <4 x float> %add + store <4 x float> %negadd, ptr %res + ret void +} + +define void @contract_vfnmadd_s_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: contract_vfnmadd_s_nsz: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 +; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 +; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 +; CONTRACT-FAST-NEXT: vfnmadd.s $vr0, $vr2, $vr1, $vr0 +; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: contract_vfnmadd_s_nsz: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 +; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 +; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 +; CONTRACT-ON-NEXT: vfnmadd.s $vr0, $vr2, $vr1, $vr0 +; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: contract_vfnmadd_s_nsz: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 +; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 +; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 +; CONTRACT-OFF-NEXT: vfnmadd.s $vr0, $vr2, $vr1, $vr0 +; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %v2 = load <4 x float>, ptr %a2 + %negv0 = fneg contract nsz<4 x float> %v0 + %negv2 = fneg contract nsz<4 x float> %v2 + %mul = fmul contract nsz<4 x float> %negv0, %v1 + %add = fadd contract nsz<4 x float> %mul, %negv2 + store <4 x float> %add, ptr %res + ret void +} + +;; Check that vfnmadd.s is not emitted. +define void @not_contract_vfnmadd_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: not_contract_vfnmadd_s: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 +; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 +; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 +; CONTRACT-FAST-NEXT: vbitrevi.w $vr2, $vr2, 31 +; CONTRACT-FAST-NEXT: vfmsub.s $vr0, $vr2, $vr1, $vr0 +; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: not_contract_vfnmadd_s: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 +; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 +; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 +; CONTRACT-ON-NEXT: vbitrevi.w $vr2, $vr2, 31 +; CONTRACT-ON-NEXT: vfmsub.s $vr0, $vr2, $vr1, $vr0 +; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: not_contract_vfnmadd_s: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 +; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 +; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 +; CONTRACT-OFF-NEXT: vbitrevi.w $vr2, $vr2, 31 +; CONTRACT-OFF-NEXT: vfmsub.s $vr0, $vr2, $vr1, $vr0 +; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %v2 = load <4 x float>, ptr %a2 + %negv0 = fneg contract <4 x float> %v0 + %negv2 = fneg contract <4 x float> %v2 + %mul = fmul contract <4 x float> %negv0, %v1 + %add = fadd contract <4 x float> %mul, %negv2 + store <4 x float> %add, ptr %res + ret void +} + +define void @contract_vfnmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: contract_vfnmsub_s: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 +; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 +; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 +; CONTRACT-FAST-NEXT: vfnmsub.s $vr0, $vr2, $vr1, $vr0 +; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: contract_vfnmsub_s: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 +; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 +; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 +; CONTRACT-ON-NEXT: vfnmsub.s $vr0, $vr2, $vr1, $vr0 +; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: contract_vfnmsub_s: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 +; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 +; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 +; CONTRACT-OFF-NEXT: vfnmsub.s $vr0, $vr2, $vr1, $vr0 +; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %v2 = load <4 x float>, ptr %a2 + %negv2 = fneg contract <4 x float> %v2 + %mul = fmul contract <4 x float> %v0, %v1 + %add = fadd contract <4 x float> %mul, %negv2 + %neg = fneg contract <4 x float> %add + store <4 x float> %neg, ptr %res + ret void +} + +define void @contract_vfnmsub_s_nsz(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: contract_vfnmsub_s_nsz: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 +; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 +; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 +; CONTRACT-FAST-NEXT: vfnmsub.s $vr0, $vr2, $vr1, $vr0 +; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: contract_vfnmsub_s_nsz: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 +; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 +; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 +; CONTRACT-ON-NEXT: vfnmsub.s $vr0, $vr2, $vr1, $vr0 +; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: contract_vfnmsub_s_nsz: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 +; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 +; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 +; CONTRACT-OFF-NEXT: vfnmsub.s $vr0, $vr2, $vr1, $vr0 +; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %v2 = load <4 x float>, ptr %a2 + %negv0 = fneg contract nsz<4 x float> %v0 + %mul = fmul contract nsz<4 x float> %negv0, %v1 + %add = fadd contract nsz<4 x float> %mul, %v2 + store <4 x float> %add, ptr %res + ret void +} + +;; Check that vfnmsub.s is not emitted. +define void @not_contract_vfnmsub_s(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: not_contract_vfnmsub_s: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 +; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 +; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 +; CONTRACT-FAST-NEXT: vbitrevi.w $vr2, $vr2, 31 +; CONTRACT-FAST-NEXT: vfmadd.s $vr0, $vr2, $vr1, $vr0 +; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: not_contract_vfnmsub_s: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 +; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 +; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 +; CONTRACT-ON-NEXT: vbitrevi.w $vr2, $vr2, 31 +; CONTRACT-ON-NEXT: vfmadd.s $vr0, $vr2, $vr1, $vr0 +; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: not_contract_vfnmsub_s: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 +; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 +; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 +; CONTRACT-OFF-NEXT: vbitrevi.w $vr2, $vr2, 31 +; CONTRACT-OFF-NEXT: vfmadd.s $vr0, $vr2, $vr1, $vr0 +; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %v2 = load <4 x float>, ptr %a2 + %negv0 = fneg contract <4 x float> %v0 + %mul = fmul contract <4 x float> %negv0, %v1 + %add = fadd contract <4 x float> %mul, %v2 + store <4 x float> %add, ptr %res + ret void +} + +define void @vfmadd_s_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: vfmadd_s_contract: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 +; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 +; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 +; CONTRACT-FAST-NEXT: vfmadd.s $vr0, $vr2, $vr1, $vr0 +; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: vfmadd_s_contract: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 +; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 +; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 +; CONTRACT-ON-NEXT: vfmadd.s $vr0, $vr2, $vr1, $vr0 +; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: vfmadd_s_contract: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 +; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 +; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 +; CONTRACT-OFF-NEXT: vfmadd.s $vr0, $vr2, $vr1, $vr0 +; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %v2 = load <4 x float>, ptr %a2 + %mul = fmul contract <4 x float> %v0, %v1 + %add = fadd contract <4 x float> %mul, %v2 + store <4 x float> %add, ptr %res + ret void +} + +define void @vfmsub_s_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: vfmsub_s_contract: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 +; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 +; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 +; CONTRACT-FAST-NEXT: vfmsub.s $vr0, $vr2, $vr1, $vr0 +; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: vfmsub_s_contract: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 +; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 +; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 +; CONTRACT-ON-NEXT: vfmsub.s $vr0, $vr2, $vr1, $vr0 +; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: vfmsub_s_contract: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 +; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 +; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 +; CONTRACT-OFF-NEXT: vfmsub.s $vr0, $vr2, $vr1, $vr0 +; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %v2 = load <4 x float>, ptr %a2 + %mul = fmul contract <4 x float> %v0, %v1 + %sub = fsub contract <4 x float> %mul, %v2 + store <4 x float> %sub, ptr %res + ret void +} + +define void @vfnmadd_s_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: vfnmadd_s_contract: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 +; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 +; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 +; CONTRACT-FAST-NEXT: vfnmadd.s $vr0, $vr2, $vr1, $vr0 +; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: vfnmadd_s_contract: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 +; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 +; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 +; CONTRACT-ON-NEXT: vfnmadd.s $vr0, $vr2, $vr1, $vr0 +; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: vfnmadd_s_contract: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 +; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 +; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 +; CONTRACT-OFF-NEXT: vfnmadd.s $vr0, $vr2, $vr1, $vr0 +; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %v2 = load <4 x float>, ptr %a2 + %mul = fmul contract <4 x float> %v0, %v1 + %add = fadd contract <4 x float> %mul, %v2 + %negadd = fneg contract <4 x float> %add + store <4 x float> %negadd, ptr %res + ret void +} + +define void @vfnmsub_s_contract(ptr %res, ptr %a0, ptr %a1, ptr %a2) nounwind { +; CONTRACT-FAST-LABEL: vfnmsub_s_contract: +; CONTRACT-FAST: # %bb.0: # %entry +; CONTRACT-FAST-NEXT: vld $vr0, $a3, 0 +; CONTRACT-FAST-NEXT: vld $vr1, $a2, 0 +; CONTRACT-FAST-NEXT: vld $vr2, $a1, 0 +; CONTRACT-FAST-NEXT: vfnmsub.s $vr0, $vr2, $vr1, $vr0 +; CONTRACT-FAST-NEXT: vst $vr0, $a0, 0 +; CONTRACT-FAST-NEXT: ret +; +; CONTRACT-ON-LABEL: vfnmsub_s_contract: +; CONTRACT-ON: # %bb.0: # %entry +; CONTRACT-ON-NEXT: vld $vr0, $a3, 0 +; CONTRACT-ON-NEXT: vld $vr1, $a2, 0 +; CONTRACT-ON-NEXT: vld $vr2, $a1, 0 +; CONTRACT-ON-NEXT: vfnmsub.s $vr0, $vr2, $vr1, $vr0 +; CONTRACT-ON-NEXT: vst $vr0, $a0, 0 +; CONTRACT-ON-NEXT: ret +; +; CONTRACT-OFF-LABEL: vfnmsub_s_contract: +; CONTRACT-OFF: # %bb.0: # %entry +; CONTRACT-OFF-NEXT: vld $vr0, $a3, 0 +; CONTRACT-OFF-NEXT: vld $vr1, $a2, 0 +; CONTRACT-OFF-NEXT: vld $vr2, $a1, 0 +; CONTRACT-OFF-NEXT: vfnmsub.s $vr0, $vr2, $vr1, $vr0 +; CONTRACT-OFF-NEXT: vst $vr0, $a0, 0 +; CONTRACT-OFF-NEXT: ret +entry: + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %v2 = load <4 x float>, ptr %a2 + %mul = fmul contract <4 x float> %v0, %v1 + %negv2 = fneg contract <4 x float> %v2 + %add = fadd contract <4 x float> %negv2, %mul + %negadd = fneg contract <4 x float> %add + store <4 x float> %negadd, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/fsqrt.ll b/llvm/test/CodeGen/LoongArch/lsx/fsqrt.ll new file mode 100644 index 0000000000000000000000000000000000000000..a57bc1ca0e94885dd0a1a7e94a7748f5b38ee796 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/fsqrt.ll @@ -0,0 +1,65 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +;; fsqrt +define void @sqrt_v4f32(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: sqrt_v4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vfsqrt.s $vr0, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x float>, ptr %a0, align 16 + %sqrt = call <4 x float> @llvm.sqrt.v4f32 (<4 x float> %v0) + store <4 x float> %sqrt, ptr %res, align 16 + ret void +} + +define void @sqrt_v2f64(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: sqrt_v2f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vfsqrt.d $vr0, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <2 x double>, ptr %a0, align 16 + %sqrt = call <2 x double> @llvm.sqrt.v2f64 (<2 x double> %v0) + store <2 x double> %sqrt, ptr %res, align 16 + ret void +} + +;; 1.0 / (fsqrt vec) +define void @one_div_sqrt_v4f32(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: one_div_sqrt_v4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vfrsqrt.s $vr0, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x float>, ptr %a0, align 16 + %sqrt = call <4 x float> @llvm.sqrt.v4f32 (<4 x float> %v0) + %div = fdiv <4 x float> , %sqrt + store <4 x float> %div, ptr %res, align 16 + ret void +} + +define void @one_div_sqrt_v2f64(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: one_div_sqrt_v2f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vfrsqrt.d $vr0, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <2 x double>, ptr %a0, align 16 + %sqrt = call <2 x double> @llvm.sqrt.v2f64 (<2 x double> %v0) + %div = fdiv <2 x double> , %sqrt + store <2 x double> %div, ptr %res, align 16 + ret void +} + +declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) +declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) diff --git a/llvm/test/CodeGen/LoongArch/lsx/inline-asm-operand-modifier.ll b/llvm/test/CodeGen/LoongArch/lsx/inline-asm-operand-modifier.ll new file mode 100644 index 0000000000000000000000000000000000000000..c46e624ddaa82a9ceed6f7895ee19252aef1b5b3 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/inline-asm-operand-modifier.ll @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +define void @test_w() nounwind { +; CHECK-LABEL: test_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: vldi $vr0, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: ret +entry: + %0 = tail call <2 x i64> asm sideeffect "vldi ${0:w}, 1", "=f"() + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/inline-asm-reg-names.ll b/llvm/test/CodeGen/LoongArch/lsx/inline-asm-reg-names.ll new file mode 100644 index 0000000000000000000000000000000000000000..ceea3621be2f69a5474bc19f2b1164c90553dde1 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/inline-asm-reg-names.ll @@ -0,0 +1,58 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +define void @register_vr1() nounwind { +; CHECK-LABEL: register_vr1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: vldi $vr1, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: ret +entry: + %0 = tail call <2 x i64> asm sideeffect "vldi ${0:w}, 1", "={$vr1}"() + ret void +} + +define void @register_vr7() nounwind { +; CHECK-LABEL: register_vr7: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: vldi $vr7, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: ret +entry: + %0 = tail call <2 x i64> asm sideeffect "vldi ${0:w}, 1", "={$vr7}"() + ret void +} + +define void @register_vr23() nounwind { +; CHECK-LABEL: register_vr23: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: vldi $vr23, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: ret +entry: + %0 = tail call <2 x i64> asm sideeffect "vldi ${0:w}, 1", "={$vr23}"() + ret void +} + +;; The lower half of the vector register '$vr31' is overlapped with +;; the floating-point register '$f31'. And '$f31' is a callee-saved +;; register which is preserved across calls. That's why the +;; fst.d and fld.d instructions are emitted. +define void @register_vr31() nounwind { +; CHECK-LABEL: register_vr31: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: fst.d $fs7, $sp, 8 # 8-byte Folded Spill +; CHECK-NEXT: #APP +; CHECK-NEXT: vldi $vr31, 1 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: fld.d $fs7, $sp, 8 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: ret +entry: + %0 = tail call <2 x i64> asm sideeffect "vldi ${0:w}, 1", "={$vr31}"() + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-absd.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-absd.ll new file mode 100644 index 0000000000000000000000000000000000000000..811d9d712de4e671544722d363bedc792d051969 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-absd.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vabsd.b(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vabsd_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vabsd_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vabsd.b $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vabsd.b(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vabsd.h(<8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vabsd_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vabsd_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vabsd.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vabsd.h(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vabsd.w(<4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vabsd_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vabsd_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vabsd.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vabsd.w(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vabsd.d(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vabsd_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vabsd_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vabsd.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vabsd.d(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vabsd.bu(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vabsd_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vabsd_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vabsd.bu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vabsd.bu(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vabsd.hu(<8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vabsd_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vabsd_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vabsd.hu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vabsd.hu(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vabsd.wu(<4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vabsd_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vabsd_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vabsd.wu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vabsd.wu(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vabsd.du(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vabsd_du(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vabsd_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vabsd.du $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vabsd.du(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-add.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-add.ll new file mode 100644 index 0000000000000000000000000000000000000000..fac16c8308dafbb79a93d3c9ddfbe1e99994091c --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-add.ll @@ -0,0 +1,62 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vadd.b(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vadd_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vadd_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vadd.b $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vadd.b(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vadd.h(<8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vadd_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vadd_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vadd.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vadd.h(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vadd.w(<4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vadd_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vadd_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vadd.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vadd.w(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vadd.d(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vadd_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vadd_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vadd.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vadd.d(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vadd.q(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vadd_q(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vadd_q: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vadd.q $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vadd.q(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-adda.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-adda.ll new file mode 100644 index 0000000000000000000000000000000000000000..79be0a184bfb18a5b22a40ad7db8f7f4390d9a3c --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-adda.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vadda.b(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vadda_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vadda_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vadda.b $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vadda.b(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vadda.h(<8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vadda_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vadda_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vadda.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vadda.h(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vadda.w(<4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vadda_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vadda_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vadda.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vadda.w(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vadda.d(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vadda_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vadda_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vadda.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vadda.d(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-addi-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-addi-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..6875872b6f83b685b01df71aa218c70ba66afb21 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-addi-invalid-imm.ll @@ -0,0 +1,65 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vaddi.bu(<16 x i8>, i32) + +define <16 x i8> @lsx_vaddi_bu_lo(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vaddi.bu: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vaddi.bu(<16 x i8> %va, i32 -1) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vaddi_bu_hi(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vaddi.bu: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vaddi.bu(<16 x i8> %va, i32 32) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vaddi.hu(<8 x i16>, i32) + +define <8 x i16> @lsx_vaddi_hu_lo(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vaddi.hu: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vaddi.hu(<8 x i16> %va, i32 -1) + ret <8 x i16> %res +} + +define <8 x i16> @lsx_vaddi_hu_hi(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vaddi.hu: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vaddi.hu(<8 x i16> %va, i32 32) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vaddi.wu(<4 x i32>, i32) + +define <4 x i32> @lsx_vaddi_wu_lo(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vaddi.wu: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vaddi.wu(<4 x i32> %va, i32 -1) + ret <4 x i32> %res +} + +define <4 x i32> @lsx_vaddi_wu_hi(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vaddi.wu: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vaddi.wu(<4 x i32> %va, i32 32) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vaddi.du(<2 x i64>, i32) + +define <2 x i64> @lsx_vaddi_du_lo(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vaddi.du: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vaddi.du(<2 x i64> %va, i32 -1) + ret <2 x i64> %res +} + +define <2 x i64> @lsx_vaddi_du_hi(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vaddi.du: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vaddi.du(<2 x i64> %va, i32 32) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-addi-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-addi-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..87d32b3ce02a8ffeddb562c6fea4e33d5d435d48 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-addi-non-imm.ll @@ -0,0 +1,37 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vaddi.bu(<16 x i8>, i32) + +define <16 x i8> @lsx_vaddi_bu(<16 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vaddi.bu(<16 x i8> %va, i32 %b) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vaddi.hu(<8 x i16>, i32) + +define <8 x i16> @lsx_vaddi_hu(<8 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vaddi.hu(<8 x i16> %va, i32 %b) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vaddi.wu(<4 x i32>, i32) + +define <4 x i32> @lsx_vaddi_wu(<4 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vaddi.wu(<4 x i32> %va, i32 %b) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vaddi.du(<2 x i64>, i32) + +define <2 x i64> @lsx_vaddi_du(<2 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vaddi.du(<2 x i64> %va, i32 %b) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-addi.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-addi.ll new file mode 100644 index 0000000000000000000000000000000000000000..b9134e0724fe4c4079d037e60c7cf5ae2dc04548 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-addi.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vaddi.bu(<16 x i8>, i32) + +define <16 x i8> @lsx_vaddi_bu(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_vaddi_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vaddi.bu $vr0, $vr0, 31 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vaddi.bu(<16 x i8> %va, i32 31) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vaddi.hu(<8 x i16>, i32) + +define <8 x i16> @lsx_vaddi_hu(<8 x i16> %va) nounwind { +; CHECK-LABEL: lsx_vaddi_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vaddi.hu $vr0, $vr0, 31 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vaddi.hu(<8 x i16> %va, i32 31) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vaddi.wu(<4 x i32>, i32) + +define <4 x i32> @lsx_vaddi_wu(<4 x i32> %va) nounwind { +; CHECK-LABEL: lsx_vaddi_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vaddi.wu $vr0, $vr0, 31 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vaddi.wu(<4 x i32> %va, i32 31) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vaddi.du(<2 x i64>, i32) + +define <2 x i64> @lsx_vaddi_du(<2 x i64> %va) nounwind { +; CHECK-LABEL: lsx_vaddi_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vaddi.du $vr0, $vr0, 31 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vaddi.du(<2 x i64> %va, i32 31) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-addw.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-addw.ll new file mode 100644 index 0000000000000000000000000000000000000000..086e3bec12d2365b44bfb3cae2b45fa7933db946 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-addw.ll @@ -0,0 +1,290 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <8 x i16> @llvm.loongarch.lsx.vaddwev.h.b(<16 x i8>, <16 x i8>) + +define <8 x i16> @lsx_vaddwev_h_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vaddwev_h_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vaddwev.h.b $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.b(<16 x i8> %va, <16 x i8> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vaddwev.w.h(<8 x i16>, <8 x i16>) + +define <4 x i32> @lsx_vaddwev_w_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vaddwev_w_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vaddwev.w.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.h(<8 x i16> %va, <8 x i16> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vaddwev.d.w(<4 x i32>, <4 x i32>) + +define <2 x i64> @lsx_vaddwev_d_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vaddwev_d_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vaddwev.d.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.w(<4 x i32> %va, <4 x i32> %vb) + ret <2 x i64> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vaddwev.q.d(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vaddwev_q_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vaddwev_q_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vaddwev.q.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.d(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu(<16 x i8>, <16 x i8>) + +define <8 x i16> @lsx_vaddwev_h_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vaddwev_h_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vaddwev.h.bu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu(<16 x i8> %va, <16 x i8> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu(<8 x i16>, <8 x i16>) + +define <4 x i32> @lsx_vaddwev_w_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vaddwev_w_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vaddwev.w.hu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu(<8 x i16> %va, <8 x i16> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu(<4 x i32>, <4 x i32>) + +define <2 x i64> @lsx_vaddwev_d_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vaddwev_d_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vaddwev.d.wu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu(<4 x i32> %va, <4 x i32> %vb) + ret <2 x i64> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vaddwev_q_du(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vaddwev_q_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vaddwev.q.du $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu.b(<16 x i8>, <16 x i8>) + +define <8 x i16> @lsx_vaddwev_h_bu_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vaddwev_h_bu_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vaddwev.h.bu.b $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu.b(<16 x i8> %va, <16 x i8> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu.h(<8 x i16>, <8 x i16>) + +define <4 x i32> @lsx_vaddwev_w_hu_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vaddwev_w_hu_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vaddwev.w.hu.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu.h(<8 x i16> %va, <8 x i16> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu.w(<4 x i32>, <4 x i32>) + +define <2 x i64> @lsx_vaddwev_d_wu_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vaddwev_d_wu_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vaddwev.d.wu.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu.w(<4 x i32> %va, <4 x i32> %vb) + ret <2 x i64> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du.d(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vaddwev_q_du_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vaddwev_q_du_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vaddwev.q.du.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du.d(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vaddwod.h.b(<16 x i8>, <16 x i8>) + +define <8 x i16> @lsx_vaddwod_h_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vaddwod_h_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vaddwod.h.b $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.b(<16 x i8> %va, <16 x i8> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vaddwod.w.h(<8 x i16>, <8 x i16>) + +define <4 x i32> @lsx_vaddwod_w_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vaddwod_w_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vaddwod.w.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.h(<8 x i16> %va, <8 x i16> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vaddwod.d.w(<4 x i32>, <4 x i32>) + +define <2 x i64> @lsx_vaddwod_d_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vaddwod_d_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vaddwod.d.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.w(<4 x i32> %va, <4 x i32> %vb) + ret <2 x i64> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vaddwod.q.d(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vaddwod_q_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vaddwod_q_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vaddwod.q.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.d(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu(<16 x i8>, <16 x i8>) + +define <8 x i16> @lsx_vaddwod_h_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vaddwod_h_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vaddwod.h.bu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu(<16 x i8> %va, <16 x i8> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu(<8 x i16>, <8 x i16>) + +define <4 x i32> @lsx_vaddwod_w_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vaddwod_w_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vaddwod.w.hu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu(<8 x i16> %va, <8 x i16> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu(<4 x i32>, <4 x i32>) + +define <2 x i64> @lsx_vaddwod_d_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vaddwod_d_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vaddwod.d.wu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu(<4 x i32> %va, <4 x i32> %vb) + ret <2 x i64> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vaddwod_q_du(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vaddwod_q_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vaddwod.q.du $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu.b(<16 x i8>, <16 x i8>) + +define <8 x i16> @lsx_vaddwod_h_bu_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vaddwod_h_bu_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vaddwod.h.bu.b $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu.b(<16 x i8> %va, <16 x i8> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu.h(<8 x i16>, <8 x i16>) + +define <4 x i32> @lsx_vaddwod_w_hu_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vaddwod_w_hu_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vaddwod.w.hu.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu.h(<8 x i16> %va, <8 x i16> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu.w(<4 x i32>, <4 x i32>) + +define <2 x i64> @lsx_vaddwod_d_wu_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vaddwod_d_wu_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vaddwod.d.wu.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu.w(<4 x i32> %va, <4 x i32> %vb) + ret <2 x i64> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du.d(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vaddwod_q_du_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vaddwod_q_du_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vaddwod.q.du.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du.d(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-and.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-and.ll new file mode 100644 index 0000000000000000000000000000000000000000..77496239c3a9f75eba70acbd910999ff1fba888f --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-and.ll @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vand.v(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vand_v(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vand_v: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vand.v $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vand.v(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-andi-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-andi-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..82a117b2aba577a11d8653b87807c0428806024d --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-andi-invalid-imm.ll @@ -0,0 +1,17 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vandi.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vandi_b_lo(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vandi.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vandi.b(<16 x i8> %va, i32 -1) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vandi_b_hi(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vandi.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vandi.b(<16 x i8> %va, i32 256) + ret <16 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-andi-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-andi-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..c0c35c775266d6270ea1e68d4c222d51e93f76fb --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-andi-non-imm.ll @@ -0,0 +1,10 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vandi.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vandi_b(<16 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vandi.b(<16 x i8> %va, i32 %b) + ret <16 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-andi.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-andi.ll new file mode 100644 index 0000000000000000000000000000000000000000..9a1c38a641d056a48f373aaba5431fd8f3608629 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-andi.ll @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vandi.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vandi_b(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_vandi_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vandi.b $vr0, $vr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vandi.b(<16 x i8> %va, i32 1) + ret <16 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-andn.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-andn.ll new file mode 100644 index 0000000000000000000000000000000000000000..b08c759ecc322bd5139a736d8aefe856a43629ed --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-andn.ll @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vandn.v(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vandn_v(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vandn_v: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vandn.v $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vandn.v(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-avg.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-avg.ll new file mode 100644 index 0000000000000000000000000000000000000000..fb0861f4cd5eec7b76dea2bfaa3e0b9aec09e1b9 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-avg.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vavg.b(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vavg_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vavg_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vavg.b $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vavg.b(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vavg.h(<8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vavg_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vavg_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vavg.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vavg.h(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vavg.w(<4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vavg_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vavg_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vavg.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vavg.w(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vavg.d(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vavg_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vavg_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vavg.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vavg.d(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vavg.bu(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vavg_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vavg_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vavg.bu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vavg.bu(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vavg.hu(<8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vavg_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vavg_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vavg.hu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vavg.hu(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vavg.wu(<4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vavg_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vavg_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vavg.wu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vavg.wu(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vavg.du(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vavg_du(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vavg_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vavg.du $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vavg.du(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-avgr.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-avgr.ll new file mode 100644 index 0000000000000000000000000000000000000000..8bf7d0ed8817325232cd0def475d996a77ad64ad --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-avgr.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vavgr.b(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vavgr_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vavgr_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vavgr.b $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vavgr.b(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vavgr.h(<8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vavgr_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vavgr_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vavgr.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vavgr.h(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vavgr.w(<4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vavgr_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vavgr_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vavgr.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vavgr.w(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vavgr.d(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vavgr_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vavgr_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vavgr.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vavgr.d(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vavgr.bu(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vavgr_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vavgr_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vavgr.bu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vavgr.bu(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vavgr.hu(<8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vavgr_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vavgr_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vavgr.hu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vavgr.hu(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vavgr.wu(<4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vavgr_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vavgr_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vavgr.wu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vavgr.wu(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vavgr.du(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vavgr_du(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vavgr_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vavgr.du $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vavgr.du(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitclr-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitclr-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..b020806cd86cbdb309dd76e05233be7d1b89b6dc --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitclr-invalid-imm.ll @@ -0,0 +1,65 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vbitclri.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vbitclri_b_lo(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vbitclri.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vbitclri.b(<16 x i8> %va, i32 -1) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vbitclri_b_hi(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vbitclri.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vbitclri.b(<16 x i8> %va, i32 8) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vbitclri.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vbitclri_h_lo(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vbitclri.h: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vbitclri.h(<8 x i16> %va, i32 -1) + ret <8 x i16> %res +} + +define <8 x i16> @lsx_vbitclri_h_hi(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vbitclri.h: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vbitclri.h(<8 x i16> %va, i32 16) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vbitclri.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vbitclri_w_lo(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vbitclri.w: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vbitclri.w(<4 x i32> %va, i32 -1) + ret <4 x i32> %res +} + +define <4 x i32> @lsx_vbitclri_w_hi(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vbitclri.w: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vbitclri.w(<4 x i32> %va, i32 32) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vbitclri.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vbitclri_d_lo(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vbitclri.d: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vbitclri.d(<2 x i64> %va, i32 -1) + ret <2 x i64> %res +} + +define <2 x i64> @lsx_vbitclri_d_hi(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vbitclri.d: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vbitclri.d(<2 x i64> %va, i32 64) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitclr-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitclr-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..df6cdb99cdbcb94d48747026694c152e683fc390 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitclr-non-imm.ll @@ -0,0 +1,37 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vbitclri.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vbitclri_b(<16 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vbitclri.b(<16 x i8> %va, i32 %b) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vbitclri.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vbitclri_h(<8 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vbitclri.h(<8 x i16> %va, i32 %b) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vbitclri.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vbitclri_w(<4 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vbitclri.w(<4 x i32> %va, i32 %b) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vbitclri.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vbitclri_d(<2 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vbitclri.d(<2 x i64> %va, i32 %b) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitclr.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitclr.ll new file mode 100644 index 0000000000000000000000000000000000000000..f5fba6dbb1414376ff22db9127ac71b111082b25 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitclr.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vbitclr.b(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vbitclr_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vbitclr_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vbitclr.b $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vbitclr.b(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vbitclr.h(<8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vbitclr_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vbitclr_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vbitclr.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vbitclr.h(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vbitclr.w(<4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vbitclr_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vbitclr_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vbitclr.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vbitclr.w(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vbitclr.d(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vbitclr_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vbitclr_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vbitclr.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vbitclr.d(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vbitclri.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vbitclri_b(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_vbitclri_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vbitclri.b $vr0, $vr0, 7 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vbitclri.b(<16 x i8> %va, i32 7) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vbitclri.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vbitclri_h(<8 x i16> %va) nounwind { +; CHECK-LABEL: lsx_vbitclri_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vbitclri.h $vr0, $vr0, 15 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vbitclri.h(<8 x i16> %va, i32 15) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vbitclri.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vbitclri_w(<4 x i32> %va) nounwind { +; CHECK-LABEL: lsx_vbitclri_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vbitclri.w $vr0, $vr0, 31 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vbitclri.w(<4 x i32> %va, i32 31) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vbitclri.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vbitclri_d(<2 x i64> %va) nounwind { +; CHECK-LABEL: lsx_vbitclri_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vbitclri.d $vr0, $vr0, 63 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vbitclri.d(<2 x i64> %va, i32 63) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitrev-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitrev-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..24b6ec3284cb8d9a07013a6dd43f9eaab57ca540 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitrev-invalid-imm.ll @@ -0,0 +1,65 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vbitrevi.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vbitrevi_b_lo(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vbitrevi.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vbitrevi.b(<16 x i8> %va, i32 -1) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vbitrevi_b_hi(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vbitrevi.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vbitrevi.b(<16 x i8> %va, i32 8) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vbitrevi.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vbitrevi_lo(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vbitrevi.h: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vbitrevi.h(<8 x i16> %va, i32 -1) + ret <8 x i16> %res +} + +define <8 x i16> @lsx_vbitrevi_h_hi(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vbitrevi.h: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vbitrevi.h(<8 x i16> %va, i32 16) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vbitrevi.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vbitrevi_w_lo(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vbitrevi.w: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vbitrevi.w(<4 x i32> %va, i32 -1) + ret <4 x i32> %res +} + +define <4 x i32> @lsx_vbitrevi_w_hi(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vbitrevi.w: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vbitrevi.w(<4 x i32> %va, i32 32) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vbitrevi.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vbitrevi_d_lo(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vbitrevi.d: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vbitrevi.d(<2 x i64> %va, i32 -1) + ret <2 x i64> %res +} + +define <2 x i64> @lsx_vbitrevi_d_hi(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vbitrevi.d: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vbitrevi.d(<2 x i64> %va, i32 64) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitrev-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitrev-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..3ffb494c9907a71edfb0fc5b0fc13c53cd2ed0c0 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitrev-non-imm.ll @@ -0,0 +1,37 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vbitrevi.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vbitrevi_b(<16 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vbitrevi.b(<16 x i8> %va, i32 %b) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vbitrevi.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vbitrevi_h(<8 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vbitrevi.h(<8 x i16> %va, i32 %b) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vbitrevi.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vbitrevi_w(<4 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vbitrevi.w(<4 x i32> %va, i32 %b) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vbitrevi.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vbitrevi_d(<2 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vbitrevi.d(<2 x i64> %va, i32 %b) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitrev.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitrev.ll new file mode 100644 index 0000000000000000000000000000000000000000..ad56e88fdb8828ce60ab9d60092c052fc5d13020 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitrev.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vbitrev.b(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vbitrev_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vbitrev_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vbitrev.b $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vbitrev.b(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vbitrev.h(<8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vbitrev_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vbitrev_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vbitrev.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vbitrev.h(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vbitrev.w(<4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vbitrev_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vbitrev_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vbitrev.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vbitrev.w(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vbitrev.d(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vbitrev_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vbitrev_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vbitrev.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vbitrev.d(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vbitrevi.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vbitrevi_b(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_vbitrevi_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vbitrevi.b $vr0, $vr0, 7 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vbitrevi.b(<16 x i8> %va, i32 7) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vbitrevi.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vbitrevi_h(<8 x i16> %va) nounwind { +; CHECK-LABEL: lsx_vbitrevi_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vbitrevi.h $vr0, $vr0, 15 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vbitrevi.h(<8 x i16> %va, i32 15) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vbitrevi.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vbitrevi_w(<4 x i32> %va) nounwind { +; CHECK-LABEL: lsx_vbitrevi_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vbitrevi.w $vr0, $vr0, 31 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vbitrevi.w(<4 x i32> %va, i32 31) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vbitrevi.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vbitrevi_d(<2 x i64> %va) nounwind { +; CHECK-LABEL: lsx_vbitrevi_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vbitrevi.d $vr0, $vr0, 63 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vbitrevi.d(<2 x i64> %va, i32 63) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitsel.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitsel.ll new file mode 100644 index 0000000000000000000000000000000000000000..4b4b5ff1fc8cdb501154835188ebdda5847786b5 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitsel.ll @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vbitsel.v(<16 x i8>, <16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vbitsel_v(<16 x i8> %va, <16 x i8> %vb, <16 x i8> %vc) nounwind { +; CHECK-LABEL: lsx_vbitsel_v: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vbitsel.v $vr0, $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vbitsel.v(<16 x i8> %va, <16 x i8> %vb, <16 x i8> %vc) + ret <16 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitseli-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitseli-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..bc63b40e9fca76d607d46faee3aa3cbb5653d605 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitseli-invalid-imm.ll @@ -0,0 +1,17 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vbitseli.b(<16 x i8>, <16 x i8>, i32) + +define <16 x i8> @lsx_vbitseli_b_lo(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vbitseli.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vbitseli.b(<16 x i8> %va, <16 x i8> %vb, i32 -1) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vbitseli_b_hi(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vbitseli.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vbitseli.b(<16 x i8> %va, <16 x i8> %vb, i32 256) + ret <16 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitseli-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitseli-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..52c1eb7d202438f32454da2742faa3929f12fcbe --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitseli-non-imm.ll @@ -0,0 +1,10 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vbitseli.b(<16 x i8>, <16 x i8>, i32) + +define <16 x i8> @lsx_vbitseli_b(<16 x i8> %va, <16 x i8> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vbitseli.b(<16 x i8> %va, <16 x i8> %vb, i32 %c) + ret <16 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitseli.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitseli.ll new file mode 100644 index 0000000000000000000000000000000000000000..28d342b5c378fb85a23ab28fcfd5148a9663edc6 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitseli.ll @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vbitseli.b(<16 x i8>, <16 x i8>, i32) + +define <16 x i8> @lsx_vbitseli_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vbitseli_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vbitseli.b $vr0, $vr1, 255 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vbitseli.b(<16 x i8> %va, <16 x i8> %vb, i32 255) + ret <16 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitset-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitset-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..e57e14d8cb077f596bd71fb7dabcef6f0c6c1dde --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitset-invalid-imm.ll @@ -0,0 +1,65 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vbitseti.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vbitseti_b_lo(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vbitseti.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vbitseti.b(<16 x i8> %va, i32 -1) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vbitseti_b_hi(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vbitseti.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vbitseti.b(<16 x i8> %va, i32 8) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vbitseti.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vbitseti_h_lo(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vbitseti.h: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vbitseti.h(<8 x i16> %va, i32 -1) + ret <8 x i16> %res +} + +define <8 x i16> @lsx_vbitseti_h_hi(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vbitseti.h: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vbitseti.h(<8 x i16> %va, i32 16) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vbitseti.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vbitseti_w_lo(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vbitseti.w: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vbitseti.w(<4 x i32> %va, i32 -1) + ret <4 x i32> %res +} + +define <4 x i32> @lsx_vbitseti_w_hi(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vbitseti.w: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vbitseti.w(<4 x i32> %va, i32 32) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vbitseti.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vbitseti_d_lo(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vbitseti.d: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vbitseti.d(<2 x i64> %va, i32 -1) + ret <2 x i64> %res +} + +define <2 x i64> @lsx_vbitseti_d_hi(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vbitseti.d: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vbitseti.d(<2 x i64> %va, i32 64) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitset-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitset-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..9b2bde015ed93f625ed06ebd6f4af05bf43dd09d --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitset-non-imm.ll @@ -0,0 +1,37 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vbitseti.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vbitseti_b(<16 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vbitseti.b(<16 x i8> %va, i32 %b) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vbitseti.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vbitseti_h(<8 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vbitseti.h(<8 x i16> %va, i32 %b) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vbitseti.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vbitseti_w(<4 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vbitseti.w(<4 x i32> %va, i32 %b) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vbitseti.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vbitseti_d(<2 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vbitseti.d(<2 x i64> %va, i32 %b) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitset.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitset.ll new file mode 100644 index 0000000000000000000000000000000000000000..75d98e6f8bce1acea086cbd028b387599d264ff3 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bitset.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vbitset.b(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vbitset_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vbitset_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vbitset.b $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vbitset.b(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vbitset.h(<8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vbitset_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vbitset_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vbitset.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vbitset.h(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vbitset.w(<4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vbitset_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vbitset_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vbitset.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vbitset.w(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vbitset.d(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vbitset_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vbitset_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vbitset.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vbitset.d(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vbitseti.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vbitseti_b(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_vbitseti_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vbitseti.b $vr0, $vr0, 7 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vbitseti.b(<16 x i8> %va, i32 7) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vbitseti.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vbitseti_h(<8 x i16> %va) nounwind { +; CHECK-LABEL: lsx_vbitseti_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vbitseti.h $vr0, $vr0, 15 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vbitseti.h(<8 x i16> %va, i32 15) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vbitseti.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vbitseti_w(<4 x i32> %va) nounwind { +; CHECK-LABEL: lsx_vbitseti_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vbitseti.w $vr0, $vr0, 31 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vbitseti.w(<4 x i32> %va, i32 31) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vbitseti.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vbitseti_d(<2 x i64> %va) nounwind { +; CHECK-LABEL: lsx_vbitseti_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vbitseti.d $vr0, $vr0, 63 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vbitseti.d(<2 x i64> %va, i32 63) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsll-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsll-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..eb49af49c9bee2925f88be60e515f3bf5e968186 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsll-invalid-imm.ll @@ -0,0 +1,17 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vbsll.v(<16 x i8>, i32) + +define <16 x i8> @lsx_vbsll_v_lo(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vbsll.v: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vbsll.v(<16 x i8> %va, i32 -1) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vbsll_v_hi(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vbsll.v: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vbsll.v(<16 x i8> %va, i32 32) + ret <16 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsll-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsll-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..5b10c9e91a4f41ceb01911be0eb5b84068d97f1d --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsll-non-imm.ll @@ -0,0 +1,10 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vbsll.v(<16 x i8>, i32) + +define <16 x i8> @lsx_vbsll_v(<16 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vbsll.v(<16 x i8> %va, i32 %b) + ret <16 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsll.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsll.ll new file mode 100644 index 0000000000000000000000000000000000000000..e7eb1cfcb4074706cb044e9349e57503f9b1811b --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsll.ll @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vbsll.v(<16 x i8>, i32) + +define <16 x i8> @lsx_vbsll_v(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_vbsll_v: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vbsll.v $vr0, $vr0, 31 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vbsll.v(<16 x i8> %va, i32 31) + ret <16 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsrl-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsrl-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..bf56822e2ef59d399e9b8725e20d3c11d697151b --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsrl-invalid-imm.ll @@ -0,0 +1,17 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vbsrl.v(<16 x i8>, i32) + +define <16 x i8> @lsx_vbsrl_v_lo(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vbsrl.v: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vbsrl.v(<16 x i8> %va, i32 -1) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vbsrl_v_hi(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vbsrl.v: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vbsrl.v(<16 x i8> %va, i32 32) + ret <16 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsrl-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsrl-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..0bc038c869ced502530018f7190e664507f28f8f --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsrl-non-imm.ll @@ -0,0 +1,10 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vbsrl.v(<16 x i8>, i32) + +define <16 x i8> @lsx_vbsrl_v(<16 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vbsrl.v(<16 x i8> %va, i32 %b) + ret <16 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsrl.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsrl.ll new file mode 100644 index 0000000000000000000000000000000000000000..fe0565297641bc3f859aec10ee0f3cb8d8f50a22 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-bsrl.ll @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vbsrl.v(<16 x i8>, i32) + +define <16 x i8> @lsx_vbsrl_v(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_vbsrl_v: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vbsrl.v $vr0, $vr0, 31 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vbsrl.v(<16 x i8> %va, i32 31) + ret <16 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-clo.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-clo.ll new file mode 100644 index 0000000000000000000000000000000000000000..c581109f3fd0b5f81645f8bbeea20bd10ff8548a --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-clo.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vclo.b(<16 x i8>) + +define <16 x i8> @lsx_vclo_b(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_vclo_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vclo.b $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vclo.b(<16 x i8> %va) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vclo.h(<8 x i16>) + +define <8 x i16> @lsx_vclo_h(<8 x i16> %va) nounwind { +; CHECK-LABEL: lsx_vclo_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vclo.h $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vclo.h(<8 x i16> %va) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vclo.w(<4 x i32>) + +define <4 x i32> @lsx_vclo_w(<4 x i32> %va) nounwind { +; CHECK-LABEL: lsx_vclo_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vclo.w $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vclo.w(<4 x i32> %va) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vclo.d(<2 x i64>) + +define <2 x i64> @lsx_vclo_d(<2 x i64> %va) nounwind { +; CHECK-LABEL: lsx_vclo_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vclo.d $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vclo.d(<2 x i64> %va) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-clz.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-clz.ll new file mode 100644 index 0000000000000000000000000000000000000000..25c37b64349b35dd4304537074a6d89de68537e0 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-clz.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vclz.b(<16 x i8>) + +define <16 x i8> @lsx_vclz_b(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_vclz_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vclz.b $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vclz.b(<16 x i8> %va) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vclz.h(<8 x i16>) + +define <8 x i16> @lsx_vclz_h(<8 x i16> %va) nounwind { +; CHECK-LABEL: lsx_vclz_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vclz.h $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vclz.h(<8 x i16> %va) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vclz.w(<4 x i32>) + +define <4 x i32> @lsx_vclz_w(<4 x i32> %va) nounwind { +; CHECK-LABEL: lsx_vclz_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vclz.w $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vclz.w(<4 x i32> %va) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vclz.d(<2 x i64>) + +define <2 x i64> @lsx_vclz_d(<2 x i64> %va) nounwind { +; CHECK-LABEL: lsx_vclz_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vclz.d $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vclz.d(<2 x i64> %va) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-div.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-div.ll new file mode 100644 index 0000000000000000000000000000000000000000..53166e84d269a3fb16f83d7fbff38251fa1972cd --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-div.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vdiv.b(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vdiv_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vdiv_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vdiv.b $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vdiv.b(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vdiv.h(<8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vdiv_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vdiv_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vdiv.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vdiv.h(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vdiv.w(<4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vdiv_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vdiv_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vdiv.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vdiv.w(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vdiv.d(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vdiv_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vdiv_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vdiv.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vdiv.d(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vdiv.bu(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vdiv_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vdiv_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vdiv.bu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vdiv.bu(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vdiv.hu(<8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vdiv_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vdiv_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vdiv.hu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vdiv.hu(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vdiv.wu(<4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vdiv_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vdiv_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vdiv.wu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vdiv.wu(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vdiv.du(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vdiv_du(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vdiv_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vdiv.du $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vdiv.du(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-exth.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-exth.ll new file mode 100644 index 0000000000000000000000000000000000000000..2f3e891a9eef2a68ca854463b615e2dd3e16f1a2 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-exth.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <8 x i16> @llvm.loongarch.lsx.vexth.h.b(<16 x i8>) + +define <8 x i16> @lsx_vexth_h_b(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_vexth_h_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vexth.h.b $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vexth.h.b(<16 x i8> %va) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vexth.w.h(<8 x i16>) + +define <4 x i32> @lsx_vexth_w_h(<8 x i16> %va) nounwind { +; CHECK-LABEL: lsx_vexth_w_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vexth.w.h $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vexth.w.h(<8 x i16> %va) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vexth.d.w(<4 x i32>) + +define <2 x i64> @lsx_vexth_d_w(<4 x i32> %va) nounwind { +; CHECK-LABEL: lsx_vexth_d_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vexth.d.w $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vexth.d.w(<4 x i32> %va) + ret <2 x i64> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vexth.q.d(<2 x i64>) + +define <2 x i64> @lsx_vexth_q_d(<2 x i64> %va) nounwind { +; CHECK-LABEL: lsx_vexth_q_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vexth.q.d $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vexth.q.d(<2 x i64> %va) + ret <2 x i64> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vexth.hu.bu(<16 x i8>) + +define <8 x i16> @lsx_vexth_hu_bu(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_vexth_hu_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vexth.hu.bu $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vexth.hu.bu(<16 x i8> %va) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vexth.wu.hu(<8 x i16>) + +define <4 x i32> @lsx_vexth_wu_hu(<8 x i16> %va) nounwind { +; CHECK-LABEL: lsx_vexth_wu_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vexth.wu.hu $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vexth.wu.hu(<8 x i16> %va) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vexth.du.wu(<4 x i32>) + +define <2 x i64> @lsx_vexth_du_wu(<4 x i32> %va) nounwind { +; CHECK-LABEL: lsx_vexth_du_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vexth.du.wu $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vexth.du.wu(<4 x i32> %va) + ret <2 x i64> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vexth.qu.du(<2 x i64>) + +define <2 x i64> @lsx_vexth_qu_du(<2 x i64> %va) nounwind { +; CHECK-LABEL: lsx_vexth_qu_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vexth.qu.du $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vexth.qu.du(<2 x i64> %va) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-extl.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-extl.ll new file mode 100644 index 0000000000000000000000000000000000000000..cbf19e2a391905d783233526650366fae4415301 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-extl.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <2 x i64> @llvm.loongarch.lsx.vextl.q.d(<2 x i64>) + +define <2 x i64> @lsx_vextl_q_d(<2 x i64> %va) nounwind { +; CHECK-LABEL: lsx_vextl_q_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vextl.q.d $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vextl.q.d(<2 x i64> %va) + ret <2 x i64> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vextl.qu.du(<2 x i64>) + +define <2 x i64> @lsx_vextl_qu_du(<2 x i64> %va) nounwind { +; CHECK-LABEL: lsx_vextl_qu_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vextl.qu.du $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vextl.qu.du(<2 x i64> %va) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-extrins-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-extrins-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..7f94234ed603b666171d41887c7c53c1dc658a2e --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-extrins-invalid-imm.ll @@ -0,0 +1,65 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vextrins.b(<16 x i8>, <16 x i8>, i32) + +define <16 x i8> @lsx_vextrins_b_lo(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vextrins.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vextrins.b(<16 x i8> %va, <16 x i8> %vb, i32 -1) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vextrins_b_hi(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vextrins.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vextrins.b(<16 x i8> %va, <16 x i8> %vb, i32 256) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vextrins.h(<8 x i16>, <8 x i16>, i32) + +define <8 x i16> @lsx_vextrins_h_lo(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vextrins.h: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vextrins.h(<8 x i16> %va, <8 x i16> %vb, i32 -1) + ret <8 x i16> %res +} + +define <8 x i16> @lsx_vextrins_h_hi(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vextrins.h: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vextrins.h(<8 x i16> %va, <8 x i16> %vb, i32 256) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vextrins.w(<4 x i32>, <4 x i32>, i32) + +define <4 x i32> @lsx_vextrins_w_lo(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vextrins.w: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vextrins.w(<4 x i32> %va, <4 x i32> %vb, i32 -1) + ret <4 x i32> %res +} + +define <4 x i32> @lsx_vextrins_w_hi(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vextrins.w: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vextrins.w(<4 x i32> %va, <4 x i32> %vb, i32 256) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vextrins.d(<2 x i64>, <2 x i64>, i32) + +define <2 x i64> @lsx_vextrins_d_lo(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vextrins.d: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vextrins.d(<2 x i64> %va, <2 x i64> %vb, i32 -1) + ret <2 x i64> %res +} + +define <2 x i64> @lsx_vextrins_d_hi(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vextrins.d: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vextrins.d(<2 x i64> %va, <2 x i64> %vb, i32 256) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-extrins-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-extrins-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..e834002bb60b8992bc05de1ac88b340a9a3c8ef7 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-extrins-non-imm.ll @@ -0,0 +1,37 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vextrins.b(<16 x i8>, <16 x i8>, i32) + +define <16 x i8> @lsx_vextrins_b(<16 x i8> %va, <16 x i8> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vextrins.b(<16 x i8> %va, <16 x i8> %vb, i32 %c) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vextrins.h(<8 x i16>, <8 x i16>, i32) + +define <8 x i16> @lsx_vextrins_h(<8 x i16> %va, <8 x i16> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vextrins.h(<8 x i16> %va, <8 x i16> %vb, i32 %c) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vextrins.w(<4 x i32>, <4 x i32>, i32) + +define <4 x i32> @lsx_vextrins_w(<4 x i32> %va, <4 x i32> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vextrins.w(<4 x i32> %va, <4 x i32> %vb, i32 %c) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vextrins.d(<2 x i64>, <2 x i64>, i32) + +define <2 x i64> @lsx_vextrins_d(<2 x i64> %va, <2 x i64> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vextrins.d(<2 x i64> %va, <2 x i64> %vb, i32 %c) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-extrins.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-extrins.ll new file mode 100644 index 0000000000000000000000000000000000000000..8f03a2b812917ebb206821acd1efab7e283807c8 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-extrins.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vextrins.b(<16 x i8>, <16 x i8>, i32) + +define <16 x i8> @lsx_vextrins_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vextrins_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vextrins.b $vr0, $vr1, 255 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vextrins.b(<16 x i8> %va, <16 x i8> %vb, i32 255) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vextrins.h(<8 x i16>, <8 x i16>, i32) + +define <8 x i16> @lsx_vextrins_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vextrins_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vextrins.h $vr0, $vr1, 255 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vextrins.h(<8 x i16> %va, <8 x i16> %vb, i32 255) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vextrins.w(<4 x i32>, <4 x i32>, i32) + +define <4 x i32> @lsx_vextrins_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vextrins_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vextrins.w $vr0, $vr1, 255 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vextrins.w(<4 x i32> %va, <4 x i32> %vb, i32 255) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vextrins.d(<2 x i64>, <2 x i64>, i32) + +define <2 x i64> @lsx_vextrins_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vextrins_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vextrins.d $vr0, $vr1, 255 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vextrins.d(<2 x i64> %va, <2 x i64> %vb, i32 255) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fadd.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fadd.ll new file mode 100644 index 0000000000000000000000000000000000000000..569002314c92923c07b58ea2aca5c4a65d60b817 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fadd.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <4 x float> @llvm.loongarch.lsx.vfadd.s(<4 x float>, <4 x float>) + +define <4 x float> @lsx_vfadd_s(<4 x float> %va, <4 x float> %vb) nounwind { +; CHECK-LABEL: lsx_vfadd_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfadd.s $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x float> @llvm.loongarch.lsx.vfadd.s(<4 x float> %va, <4 x float> %vb) + ret <4 x float> %res +} + +declare <2 x double> @llvm.loongarch.lsx.vfadd.d(<2 x double>, <2 x double>) + +define <2 x double> @lsx_vfadd_d(<2 x double> %va, <2 x double> %vb) nounwind { +; CHECK-LABEL: lsx_vfadd_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfadd.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x double> @llvm.loongarch.lsx.vfadd.d(<2 x double> %va, <2 x double> %vb) + ret <2 x double> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fclass.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fclass.ll new file mode 100644 index 0000000000000000000000000000000000000000..0c668218710174f07e9096826c074a48b78c68a5 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fclass.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <4 x i32> @llvm.loongarch.lsx.vfclass.s(<4 x float>) + +define <4 x i32> @lsx_vfclass_s(<4 x float> %va) nounwind { +; CHECK-LABEL: lsx_vfclass_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfclass.s $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vfclass.s(<4 x float> %va) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vfclass.d(<2 x double>) + +define <2 x i64> @lsx_vfclass_d(<2 x double> %va) nounwind { +; CHECK-LABEL: lsx_vfclass_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfclass.d $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vfclass.d(<2 x double> %va) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcmp.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcmp.ll new file mode 100644 index 0000000000000000000000000000000000000000..669c53b73b16fe49e6810be5e55745575b68ff71 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcmp.ll @@ -0,0 +1,530 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <4 x i32> @llvm.loongarch.lsx.vfcmp.caf.s(<4 x float>, <4 x float>) + +define <4 x i32> @lsx_vfcmp_caf_s(<4 x float> %va, <4 x float> %vb) nounwind { +; CHECK-LABEL: lsx_vfcmp_caf_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcmp.caf.s $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.caf.s(<4 x float> %va, <4 x float> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vfcmp.caf.d(<2 x double>, <2 x double>) + +define <2 x i64> @lsx_vfcmp_caf_d(<2 x double> %va, <2 x double> %vb) nounwind { +; CHECK-LABEL: lsx_vfcmp_caf_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcmp.caf.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.caf.d(<2 x double> %va, <2 x double> %vb) + ret <2 x i64> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vfcmp.cun.s(<4 x float>, <4 x float>) + +define <4 x i32> @lsx_vfcmp_cun_s(<4 x float> %va, <4 x float> %vb) nounwind { +; CHECK-LABEL: lsx_vfcmp_cun_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcmp.cun.s $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.cun.s(<4 x float> %va, <4 x float> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vfcmp.cun.d(<2 x double>, <2 x double>) + +define <2 x i64> @lsx_vfcmp_cun_d(<2 x double> %va, <2 x double> %vb) nounwind { +; CHECK-LABEL: lsx_vfcmp_cun_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcmp.cun.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.cun.d(<2 x double> %va, <2 x double> %vb) + ret <2 x i64> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vfcmp.ceq.s(<4 x float>, <4 x float>) + +define <4 x i32> @lsx_vfcmp_ceq_s(<4 x float> %va, <4 x float> %vb) nounwind { +; CHECK-LABEL: lsx_vfcmp_ceq_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcmp.ceq.s $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.ceq.s(<4 x float> %va, <4 x float> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vfcmp.ceq.d(<2 x double>, <2 x double>) + +define <2 x i64> @lsx_vfcmp_ceq_d(<2 x double> %va, <2 x double> %vb) nounwind { +; CHECK-LABEL: lsx_vfcmp_ceq_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcmp.ceq.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.ceq.d(<2 x double> %va, <2 x double> %vb) + ret <2 x i64> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vfcmp.cueq.s(<4 x float>, <4 x float>) + +define <4 x i32> @lsx_vfcmp_cueq_s(<4 x float> %va, <4 x float> %vb) nounwind { +; CHECK-LABEL: lsx_vfcmp_cueq_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcmp.cueq.s $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.cueq.s(<4 x float> %va, <4 x float> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vfcmp.cueq.d(<2 x double>, <2 x double>) + +define <2 x i64> @lsx_vfcmp_cueq_d(<2 x double> %va, <2 x double> %vb) nounwind { +; CHECK-LABEL: lsx_vfcmp_cueq_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcmp.cueq.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.cueq.d(<2 x double> %va, <2 x double> %vb) + ret <2 x i64> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vfcmp.clt.s(<4 x float>, <4 x float>) + +define <4 x i32> @lsx_vfcmp_clt_s(<4 x float> %va, <4 x float> %vb) nounwind { +; CHECK-LABEL: lsx_vfcmp_clt_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcmp.clt.s $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.clt.s(<4 x float> %va, <4 x float> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vfcmp.clt.d(<2 x double>, <2 x double>) + +define <2 x i64> @lsx_vfcmp_clt_d(<2 x double> %va, <2 x double> %vb) nounwind { +; CHECK-LABEL: lsx_vfcmp_clt_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcmp.clt.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.clt.d(<2 x double> %va, <2 x double> %vb) + ret <2 x i64> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vfcmp.cult.s(<4 x float>, <4 x float>) + +define <4 x i32> @lsx_vfcmp_cult_s(<4 x float> %va, <4 x float> %vb) nounwind { +; CHECK-LABEL: lsx_vfcmp_cult_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcmp.cult.s $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.cult.s(<4 x float> %va, <4 x float> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vfcmp.cult.d(<2 x double>, <2 x double>) + +define <2 x i64> @lsx_vfcmp_cult_d(<2 x double> %va, <2 x double> %vb) nounwind { +; CHECK-LABEL: lsx_vfcmp_cult_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcmp.cult.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.cult.d(<2 x double> %va, <2 x double> %vb) + ret <2 x i64> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vfcmp.cle.s(<4 x float>, <4 x float>) + +define <4 x i32> @lsx_vfcmp_cle_s(<4 x float> %va, <4 x float> %vb) nounwind { +; CHECK-LABEL: lsx_vfcmp_cle_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcmp.cle.s $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.cle.s(<4 x float> %va, <4 x float> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vfcmp.cle.d(<2 x double>, <2 x double>) + +define <2 x i64> @lsx_vfcmp_cle_d(<2 x double> %va, <2 x double> %vb) nounwind { +; CHECK-LABEL: lsx_vfcmp_cle_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcmp.cle.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.cle.d(<2 x double> %va, <2 x double> %vb) + ret <2 x i64> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vfcmp.cule.s(<4 x float>, <4 x float>) + +define <4 x i32> @lsx_vfcmp_cule_s(<4 x float> %va, <4 x float> %vb) nounwind { +; CHECK-LABEL: lsx_vfcmp_cule_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcmp.cule.s $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.cule.s(<4 x float> %va, <4 x float> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vfcmp.cule.d(<2 x double>, <2 x double>) + +define <2 x i64> @lsx_vfcmp_cule_d(<2 x double> %va, <2 x double> %vb) nounwind { +; CHECK-LABEL: lsx_vfcmp_cule_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcmp.cule.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.cule.d(<2 x double> %va, <2 x double> %vb) + ret <2 x i64> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vfcmp.cne.s(<4 x float>, <4 x float>) + +define <4 x i32> @lsx_vfcmp_cne_s(<4 x float> %va, <4 x float> %vb) nounwind { +; CHECK-LABEL: lsx_vfcmp_cne_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcmp.cne.s $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.cne.s(<4 x float> %va, <4 x float> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vfcmp.cne.d(<2 x double>, <2 x double>) + +define <2 x i64> @lsx_vfcmp_cne_d(<2 x double> %va, <2 x double> %vb) nounwind { +; CHECK-LABEL: lsx_vfcmp_cne_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcmp.cne.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.cne.d(<2 x double> %va, <2 x double> %vb) + ret <2 x i64> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vfcmp.cor.s(<4 x float>, <4 x float>) + +define <4 x i32> @lsx_vfcmp_cor_s(<4 x float> %va, <4 x float> %vb) nounwind { +; CHECK-LABEL: lsx_vfcmp_cor_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcmp.cor.s $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.cor.s(<4 x float> %va, <4 x float> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vfcmp.cor.d(<2 x double>, <2 x double>) + +define <2 x i64> @lsx_vfcmp_cor_d(<2 x double> %va, <2 x double> %vb) nounwind { +; CHECK-LABEL: lsx_vfcmp_cor_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcmp.cor.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.cor.d(<2 x double> %va, <2 x double> %vb) + ret <2 x i64> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vfcmp.cune.s(<4 x float>, <4 x float>) + +define <4 x i32> @lsx_vfcmp_cune_s(<4 x float> %va, <4 x float> %vb) nounwind { +; CHECK-LABEL: lsx_vfcmp_cune_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcmp.cune.s $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.cune.s(<4 x float> %va, <4 x float> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vfcmp.cune.d(<2 x double>, <2 x double>) + +define <2 x i64> @lsx_vfcmp_cune_d(<2 x double> %va, <2 x double> %vb) nounwind { +; CHECK-LABEL: lsx_vfcmp_cune_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcmp.cune.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.cune.d(<2 x double> %va, <2 x double> %vb) + ret <2 x i64> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vfcmp.saf.s(<4 x float>, <4 x float>) + +define <4 x i32> @lsx_vfcmp_saf_s(<4 x float> %va, <4 x float> %vb) nounwind { +; CHECK-LABEL: lsx_vfcmp_saf_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcmp.saf.s $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.saf.s(<4 x float> %va, <4 x float> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vfcmp.saf.d(<2 x double>, <2 x double>) + +define <2 x i64> @lsx_vfcmp_saf_d(<2 x double> %va, <2 x double> %vb) nounwind { +; CHECK-LABEL: lsx_vfcmp_saf_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcmp.saf.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.saf.d(<2 x double> %va, <2 x double> %vb) + ret <2 x i64> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vfcmp.sun.s(<4 x float>, <4 x float>) + +define <4 x i32> @lsx_vfcmp_sun_s(<4 x float> %va, <4 x float> %vb) nounwind { +; CHECK-LABEL: lsx_vfcmp_sun_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcmp.sun.s $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.sun.s(<4 x float> %va, <4 x float> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vfcmp.sun.d(<2 x double>, <2 x double>) + +define <2 x i64> @lsx_vfcmp_sun_d(<2 x double> %va, <2 x double> %vb) nounwind { +; CHECK-LABEL: lsx_vfcmp_sun_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcmp.sun.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.sun.d(<2 x double> %va, <2 x double> %vb) + ret <2 x i64> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vfcmp.seq.s(<4 x float>, <4 x float>) + +define <4 x i32> @lsx_vfcmp_seq_s(<4 x float> %va, <4 x float> %vb) nounwind { +; CHECK-LABEL: lsx_vfcmp_seq_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcmp.seq.s $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.seq.s(<4 x float> %va, <4 x float> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vfcmp.seq.d(<2 x double>, <2 x double>) + +define <2 x i64> @lsx_vfcmp_seq_d(<2 x double> %va, <2 x double> %vb) nounwind { +; CHECK-LABEL: lsx_vfcmp_seq_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcmp.seq.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.seq.d(<2 x double> %va, <2 x double> %vb) + ret <2 x i64> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vfcmp.sueq.s(<4 x float>, <4 x float>) + +define <4 x i32> @lsx_vfcmp_sueq_s(<4 x float> %va, <4 x float> %vb) nounwind { +; CHECK-LABEL: lsx_vfcmp_sueq_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcmp.sueq.s $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.sueq.s(<4 x float> %va, <4 x float> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vfcmp.sueq.d(<2 x double>, <2 x double>) + +define <2 x i64> @lsx_vfcmp_sueq_d(<2 x double> %va, <2 x double> %vb) nounwind { +; CHECK-LABEL: lsx_vfcmp_sueq_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcmp.sueq.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.sueq.d(<2 x double> %va, <2 x double> %vb) + ret <2 x i64> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vfcmp.slt.s(<4 x float>, <4 x float>) + +define <4 x i32> @lsx_vfcmp_slt_s(<4 x float> %va, <4 x float> %vb) nounwind { +; CHECK-LABEL: lsx_vfcmp_slt_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcmp.slt.s $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.slt.s(<4 x float> %va, <4 x float> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vfcmp.slt.d(<2 x double>, <2 x double>) + +define <2 x i64> @lsx_vfcmp_slt_d(<2 x double> %va, <2 x double> %vb) nounwind { +; CHECK-LABEL: lsx_vfcmp_slt_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcmp.slt.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.slt.d(<2 x double> %va, <2 x double> %vb) + ret <2 x i64> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vfcmp.sult.s(<4 x float>, <4 x float>) + +define <4 x i32> @lsx_vfcmp_sult_s(<4 x float> %va, <4 x float> %vb) nounwind { +; CHECK-LABEL: lsx_vfcmp_sult_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcmp.sult.s $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.sult.s(<4 x float> %va, <4 x float> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vfcmp.sult.d(<2 x double>, <2 x double>) + +define <2 x i64> @lsx_vfcmp_sult_d(<2 x double> %va, <2 x double> %vb) nounwind { +; CHECK-LABEL: lsx_vfcmp_sult_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcmp.sult.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.sult.d(<2 x double> %va, <2 x double> %vb) + ret <2 x i64> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vfcmp.sle.s(<4 x float>, <4 x float>) + +define <4 x i32> @lsx_vfcmp_sle_s(<4 x float> %va, <4 x float> %vb) nounwind { +; CHECK-LABEL: lsx_vfcmp_sle_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcmp.sle.s $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.sle.s(<4 x float> %va, <4 x float> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vfcmp.sle.d(<2 x double>, <2 x double>) + +define <2 x i64> @lsx_vfcmp_sle_d(<2 x double> %va, <2 x double> %vb) nounwind { +; CHECK-LABEL: lsx_vfcmp_sle_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcmp.sle.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.sle.d(<2 x double> %va, <2 x double> %vb) + ret <2 x i64> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vfcmp.sule.s(<4 x float>, <4 x float>) + +define <4 x i32> @lsx_vfcmp_sule_s(<4 x float> %va, <4 x float> %vb) nounwind { +; CHECK-LABEL: lsx_vfcmp_sule_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcmp.sule.s $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.sule.s(<4 x float> %va, <4 x float> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vfcmp.sule.d(<2 x double>, <2 x double>) + +define <2 x i64> @lsx_vfcmp_sule_d(<2 x double> %va, <2 x double> %vb) nounwind { +; CHECK-LABEL: lsx_vfcmp_sule_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcmp.sule.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.sule.d(<2 x double> %va, <2 x double> %vb) + ret <2 x i64> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vfcmp.sne.s(<4 x float>, <4 x float>) + +define <4 x i32> @lsx_vfcmp_sne_s(<4 x float> %va, <4 x float> %vb) nounwind { +; CHECK-LABEL: lsx_vfcmp_sne_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcmp.sne.s $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.sne.s(<4 x float> %va, <4 x float> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vfcmp.sne.d(<2 x double>, <2 x double>) + +define <2 x i64> @lsx_vfcmp_sne_d(<2 x double> %va, <2 x double> %vb) nounwind { +; CHECK-LABEL: lsx_vfcmp_sne_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcmp.sne.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.sne.d(<2 x double> %va, <2 x double> %vb) + ret <2 x i64> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vfcmp.sor.s(<4 x float>, <4 x float>) + +define <4 x i32> @lsx_vfcmp_sor_s(<4 x float> %va, <4 x float> %vb) nounwind { +; CHECK-LABEL: lsx_vfcmp_sor_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcmp.sor.s $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.sor.s(<4 x float> %va, <4 x float> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vfcmp.sor.d(<2 x double>, <2 x double>) + +define <2 x i64> @lsx_vfcmp_sor_d(<2 x double> %va, <2 x double> %vb) nounwind { +; CHECK-LABEL: lsx_vfcmp_sor_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcmp.sor.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.sor.d(<2 x double> %va, <2 x double> %vb) + ret <2 x i64> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vfcmp.sune.s(<4 x float>, <4 x float>) + +define <4 x i32> @lsx_vfcmp_sune_s(<4 x float> %va, <4 x float> %vb) nounwind { +; CHECK-LABEL: lsx_vfcmp_sune_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcmp.sune.s $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vfcmp.sune.s(<4 x float> %va, <4 x float> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vfcmp.sune.d(<2 x double>, <2 x double>) + +define <2 x i64> @lsx_vfcmp_sune_d(<2 x double> %va, <2 x double> %vb) nounwind { +; CHECK-LABEL: lsx_vfcmp_sune_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcmp.sune.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vfcmp.sune.d(<2 x double> %va, <2 x double> %vb) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcvt.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcvt.ll new file mode 100644 index 0000000000000000000000000000000000000000..a6a151a96d84e7f7496032c09fc313ab48b1e074 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcvt.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <8 x i16> @llvm.loongarch.lsx.vfcvt.h.s(<4 x float>, <4 x float>) + +define <8 x i16> @lsx_vfcvt_h_s(<4 x float> %va, <4 x float> %vb) nounwind { +; CHECK-LABEL: lsx_vfcvt_h_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcvt.h.s $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vfcvt.h.s(<4 x float> %va, <4 x float> %vb) + ret <8 x i16> %res +} + +declare <4 x float> @llvm.loongarch.lsx.vfcvt.s.d(<2 x double>, <2 x double>) + +define <4 x float> @lsx_vfcvt_s_d(<2 x double> %va, <2 x double> %vb) nounwind { +; CHECK-LABEL: lsx_vfcvt_s_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcvt.s.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x float> @llvm.loongarch.lsx.vfcvt.s.d(<2 x double> %va, <2 x double> %vb) + ret <4 x float> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcvth.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcvth.ll new file mode 100644 index 0000000000000000000000000000000000000000..a9e4328bd011dbd7d9a06f2d1a7d26ecac816190 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcvth.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <4 x float> @llvm.loongarch.lsx.vfcvth.s.h(<8 x i16>) + +define <4 x float> @lsx_vfcvth_s_h(<8 x i16> %va) nounwind { +; CHECK-LABEL: lsx_vfcvth_s_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcvth.s.h $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x float> @llvm.loongarch.lsx.vfcvth.s.h(<8 x i16> %va) + ret <4 x float> %res +} + +declare <2 x double> @llvm.loongarch.lsx.vfcvth.d.s(<4 x float>) + +define <2 x double> @lsx_vfcvth_d_s(<4 x float> %va) nounwind { +; CHECK-LABEL: lsx_vfcvth_d_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcvth.d.s $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x double> @llvm.loongarch.lsx.vfcvth.d.s(<4 x float> %va) + ret <2 x double> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcvtl.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcvtl.ll new file mode 100644 index 0000000000000000000000000000000000000000..9a69964bb22741ffc32f00299dff78473caba8be --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fcvtl.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <4 x float> @llvm.loongarch.lsx.vfcvtl.s.h(<8 x i16>) + +define <4 x float> @lsx_vfcvtl_s_h(<8 x i16> %va) nounwind { +; CHECK-LABEL: lsx_vfcvtl_s_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcvtl.s.h $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x float> @llvm.loongarch.lsx.vfcvtl.s.h(<8 x i16> %va) + ret <4 x float> %res +} + +declare <2 x double> @llvm.loongarch.lsx.vfcvtl.d.s(<4 x float>) + +define <2 x double> @lsx_vfcvtl_d_s(<4 x float> %va) nounwind { +; CHECK-LABEL: lsx_vfcvtl_d_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfcvtl.d.s $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x double> @llvm.loongarch.lsx.vfcvtl.d.s(<4 x float> %va) + ret <2 x double> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fdiv.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fdiv.ll new file mode 100644 index 0000000000000000000000000000000000000000..1ca8e5e2c0e9c435ba71ecd3d11360ad7dbd84db --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fdiv.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <4 x float> @llvm.loongarch.lsx.vfdiv.s(<4 x float>, <4 x float>) + +define <4 x float> @lsx_vfdiv_s(<4 x float> %va, <4 x float> %vb) nounwind { +; CHECK-LABEL: lsx_vfdiv_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfdiv.s $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x float> @llvm.loongarch.lsx.vfdiv.s(<4 x float> %va, <4 x float> %vb) + ret <4 x float> %res +} + +declare <2 x double> @llvm.loongarch.lsx.vfdiv.d(<2 x double>, <2 x double>) + +define <2 x double> @lsx_vfdiv_d(<2 x double> %va, <2 x double> %vb) nounwind { +; CHECK-LABEL: lsx_vfdiv_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfdiv.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x double> @llvm.loongarch.lsx.vfdiv.d(<2 x double> %va, <2 x double> %vb) + ret <2 x double> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ffint.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ffint.ll new file mode 100644 index 0000000000000000000000000000000000000000..62fbcfa339cda62d8e4b5dfb9f00e6f1b303f81a --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ffint.ll @@ -0,0 +1,86 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <4 x float> @llvm.loongarch.lsx.vffint.s.w(<4 x i32>) + +define <4 x float> @lsx_vffint_s_w(<4 x i32> %va) nounwind { +; CHECK-LABEL: lsx_vffint_s_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vffint.s.w $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x float> @llvm.loongarch.lsx.vffint.s.w(<4 x i32> %va) + ret <4 x float> %res +} + +declare <2 x double> @llvm.loongarch.lsx.vffint.d.l(<2 x i64>) + +define <2 x double> @lsx_vffint_d_l(<2 x i64> %va) nounwind { +; CHECK-LABEL: lsx_vffint_d_l: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vffint.d.l $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x double> @llvm.loongarch.lsx.vffint.d.l(<2 x i64> %va) + ret <2 x double> %res +} + +declare <4 x float> @llvm.loongarch.lsx.vffint.s.wu(<4 x i32>) + +define <4 x float> @lsx_vffint_s_wu(<4 x i32> %va) nounwind { +; CHECK-LABEL: lsx_vffint_s_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vffint.s.wu $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x float> @llvm.loongarch.lsx.vffint.s.wu(<4 x i32> %va) + ret <4 x float> %res +} + +declare <2 x double> @llvm.loongarch.lsx.vffint.d.lu(<2 x i64>) + +define <2 x double> @lsx_vffint_d_lu(<2 x i64> %va) nounwind { +; CHECK-LABEL: lsx_vffint_d_lu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vffint.d.lu $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x double> @llvm.loongarch.lsx.vffint.d.lu(<2 x i64> %va) + ret <2 x double> %res +} + +declare <2 x double> @llvm.loongarch.lsx.vffintl.d.w(<4 x i32>) + +define <2 x double> @lsx_vffintl_d_w(<4 x i32> %va) nounwind { +; CHECK-LABEL: lsx_vffintl_d_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vffintl.d.w $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x double> @llvm.loongarch.lsx.vffintl.d.w(<4 x i32> %va) + ret <2 x double> %res +} + +declare <2 x double> @llvm.loongarch.lsx.vffinth.d.w(<4 x i32>) + +define <2 x double> @lsx_vffinth_d_w(<4 x i32> %va) nounwind { +; CHECK-LABEL: lsx_vffinth_d_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vffinth.d.w $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x double> @llvm.loongarch.lsx.vffinth.d.w(<4 x i32> %va) + ret <2 x double> %res +} + +declare <4 x float> @llvm.loongarch.lsx.vffint.s.l(<2 x i64>, <2 x i64>) + +define <4 x float> @lsx_vffint_s_l(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vffint_s_l: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vffint.s.l $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x float> @llvm.loongarch.lsx.vffint.s.l(<2 x i64> %va, <2 x i64> %vb) + ret <4 x float> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-flogb.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-flogb.ll new file mode 100644 index 0000000000000000000000000000000000000000..d8382acc70ed68a49c52b247295975788a5f7e1a --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-flogb.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <4 x float> @llvm.loongarch.lsx.vflogb.s(<4 x float>) + +define <4 x float> @lsx_vflogb_s(<4 x float> %va) nounwind { +; CHECK-LABEL: lsx_vflogb_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vflogb.s $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x float> @llvm.loongarch.lsx.vflogb.s(<4 x float> %va) + ret <4 x float> %res +} + +declare <2 x double> @llvm.loongarch.lsx.vflogb.d(<2 x double>) + +define <2 x double> @lsx_vflogb_d(<2 x double> %va) nounwind { +; CHECK-LABEL: lsx_vflogb_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vflogb.d $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x double> @llvm.loongarch.lsx.vflogb.d(<2 x double> %va) + ret <2 x double> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmadd.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmadd.ll new file mode 100644 index 0000000000000000000000000000000000000000..adbaf6c76b1b6a73919a02e8374b4160cb86f1ee --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmadd.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <4 x float> @llvm.loongarch.lsx.vfmadd.s(<4 x float>, <4 x float>, <4 x float>) + +define <4 x float> @lsx_vfmadd_s(<4 x float> %va, <4 x float> %vb, <4 x float> %vc) nounwind { +; CHECK-LABEL: lsx_vfmadd_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfmadd.s $vr0, $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <4 x float> @llvm.loongarch.lsx.vfmadd.s(<4 x float> %va, <4 x float> %vb, <4 x float> %vc) + ret <4 x float> %res +} + +declare <2 x double> @llvm.loongarch.lsx.vfmadd.d(<2 x double>, <2 x double>, <2 x double>) + +define <2 x double> @lsx_vfmadd_d(<2 x double> %va, <2 x double> %vb, <2 x double> %vc) nounwind { +; CHECK-LABEL: lsx_vfmadd_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfmadd.d $vr0, $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <2 x double> @llvm.loongarch.lsx.vfmadd.d(<2 x double> %va, <2 x double> %vb, <2 x double> %vc) + ret <2 x double> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmax.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmax.ll new file mode 100644 index 0000000000000000000000000000000000000000..89f757c4e456792f802e0e2d610149020c773763 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmax.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <4 x float> @llvm.loongarch.lsx.vfmax.s(<4 x float>, <4 x float>) + +define <4 x float> @lsx_vfmax_s(<4 x float> %va, <4 x float> %vb) nounwind { +; CHECK-LABEL: lsx_vfmax_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfmax.s $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x float> @llvm.loongarch.lsx.vfmax.s(<4 x float> %va, <4 x float> %vb) + ret <4 x float> %res +} + +declare <2 x double> @llvm.loongarch.lsx.vfmax.d(<2 x double>, <2 x double>) + +define <2 x double> @lsx_vfmax_d(<2 x double> %va, <2 x double> %vb) nounwind { +; CHECK-LABEL: lsx_vfmax_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfmax.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x double> @llvm.loongarch.lsx.vfmax.d(<2 x double> %va, <2 x double> %vb) + ret <2 x double> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmaxa.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmaxa.ll new file mode 100644 index 0000000000000000000000000000000000000000..5662acc0b9a143b7711b736d716aa8c12fa895f1 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmaxa.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <4 x float> @llvm.loongarch.lsx.vfmaxa.s(<4 x float>, <4 x float>) + +define <4 x float> @lsx_vfmaxa_s(<4 x float> %va, <4 x float> %vb) nounwind { +; CHECK-LABEL: lsx_vfmaxa_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfmaxa.s $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x float> @llvm.loongarch.lsx.vfmaxa.s(<4 x float> %va, <4 x float> %vb) + ret <4 x float> %res +} + +declare <2 x double> @llvm.loongarch.lsx.vfmaxa.d(<2 x double>, <2 x double>) + +define <2 x double> @lsx_vfmaxa_d(<2 x double> %va, <2 x double> %vb) nounwind { +; CHECK-LABEL: lsx_vfmaxa_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfmaxa.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x double> @llvm.loongarch.lsx.vfmaxa.d(<2 x double> %va, <2 x double> %vb) + ret <2 x double> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmin.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmin.ll new file mode 100644 index 0000000000000000000000000000000000000000..0f844240277fb0cf71216d5f26ac1240f35e0c53 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmin.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <4 x float> @llvm.loongarch.lsx.vfmin.s(<4 x float>, <4 x float>) + +define <4 x float> @lsx_vfmin_s(<4 x float> %va, <4 x float> %vb) nounwind { +; CHECK-LABEL: lsx_vfmin_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfmin.s $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x float> @llvm.loongarch.lsx.vfmin.s(<4 x float> %va, <4 x float> %vb) + ret <4 x float> %res +} + +declare <2 x double> @llvm.loongarch.lsx.vfmin.d(<2 x double>, <2 x double>) + +define <2 x double> @lsx_vfmin_d(<2 x double> %va, <2 x double> %vb) nounwind { +; CHECK-LABEL: lsx_vfmin_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfmin.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x double> @llvm.loongarch.lsx.vfmin.d(<2 x double> %va, <2 x double> %vb) + ret <2 x double> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmina.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmina.ll new file mode 100644 index 0000000000000000000000000000000000000000..27f70b5fba3229f65aca81aaee5f8dedfcf2ccde --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmina.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <4 x float> @llvm.loongarch.lsx.vfmina.s(<4 x float>, <4 x float>) + +define <4 x float> @lsx_vfmina_s(<4 x float> %va, <4 x float> %vb) nounwind { +; CHECK-LABEL: lsx_vfmina_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfmina.s $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x float> @llvm.loongarch.lsx.vfmina.s(<4 x float> %va, <4 x float> %vb) + ret <4 x float> %res +} + +declare <2 x double> @llvm.loongarch.lsx.vfmina.d(<2 x double>, <2 x double>) + +define <2 x double> @lsx_vfmina_d(<2 x double> %va, <2 x double> %vb) nounwind { +; CHECK-LABEL: lsx_vfmina_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfmina.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x double> @llvm.loongarch.lsx.vfmina.d(<2 x double> %va, <2 x double> %vb) + ret <2 x double> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmsub.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmsub.ll new file mode 100644 index 0000000000000000000000000000000000000000..856ca9cadbd90583000e00a556924a22fc7f76ad --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmsub.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <4 x float> @llvm.loongarch.lsx.vfmsub.s(<4 x float>, <4 x float>, <4 x float>) + +define <4 x float> @lsx_vfmsub_s(<4 x float> %va, <4 x float> %vb, <4 x float> %vc) nounwind { +; CHECK-LABEL: lsx_vfmsub_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfmsub.s $vr0, $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <4 x float> @llvm.loongarch.lsx.vfmsub.s(<4 x float> %va, <4 x float> %vb, <4 x float> %vc) + ret <4 x float> %res +} + +declare <2 x double> @llvm.loongarch.lsx.vfmsub.d(<2 x double>, <2 x double>, <2 x double>) + +define <2 x double> @lsx_vfmsub_d(<2 x double> %va, <2 x double> %vb, <2 x double> %vc) nounwind { +; CHECK-LABEL: lsx_vfmsub_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfmsub.d $vr0, $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <2 x double> @llvm.loongarch.lsx.vfmsub.d(<2 x double> %va, <2 x double> %vb, <2 x double> %vc) + ret <2 x double> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmul.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmul.ll new file mode 100644 index 0000000000000000000000000000000000000000..1e6c4c77d536b4a8584db52120bd56ac535c0ad7 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fmul.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <4 x float> @llvm.loongarch.lsx.vfmul.s(<4 x float>, <4 x float>) + +define <4 x float> @lsx_vfmul_s(<4 x float> %va, <4 x float> %vb) nounwind { +; CHECK-LABEL: lsx_vfmul_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfmul.s $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x float> @llvm.loongarch.lsx.vfmul.s(<4 x float> %va, <4 x float> %vb) + ret <4 x float> %res +} + +declare <2 x double> @llvm.loongarch.lsx.vfmul.d(<2 x double>, <2 x double>) + +define <2 x double> @lsx_vfmul_d(<2 x double> %va, <2 x double> %vb) nounwind { +; CHECK-LABEL: lsx_vfmul_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfmul.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x double> @llvm.loongarch.lsx.vfmul.d(<2 x double> %va, <2 x double> %vb) + ret <2 x double> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fnmadd.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fnmadd.ll new file mode 100644 index 0000000000000000000000000000000000000000..e1a9ea78ef9db50ac4a36c375d0935bd5a113796 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fnmadd.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <4 x float> @llvm.loongarch.lsx.vfnmadd.s(<4 x float>, <4 x float>, <4 x float>) + +define <4 x float> @lsx_vfnmadd_s(<4 x float> %va, <4 x float> %vb, <4 x float> %vc) nounwind { +; CHECK-LABEL: lsx_vfnmadd_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfnmadd.s $vr0, $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <4 x float> @llvm.loongarch.lsx.vfnmadd.s(<4 x float> %va, <4 x float> %vb, <4 x float> %vc) + ret <4 x float> %res +} + +declare <2 x double> @llvm.loongarch.lsx.vfnmadd.d(<2 x double>, <2 x double>, <2 x double>) + +define <2 x double> @lsx_vfnmadd_d(<2 x double> %va, <2 x double> %vb, <2 x double> %vc) nounwind { +; CHECK-LABEL: lsx_vfnmadd_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfnmadd.d $vr0, $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <2 x double> @llvm.loongarch.lsx.vfnmadd.d(<2 x double> %va, <2 x double> %vb, <2 x double> %vc) + ret <2 x double> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fnmsub.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fnmsub.ll new file mode 100644 index 0000000000000000000000000000000000000000..46db0f4a50613abb9db42eff39c8a6b766c7b2df --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fnmsub.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <4 x float> @llvm.loongarch.lsx.vfnmsub.s(<4 x float>, <4 x float>, <4 x float>) + +define <4 x float> @lsx_vfnmsub_s(<4 x float> %va, <4 x float> %vb, <4 x float> %vc) nounwind { +; CHECK-LABEL: lsx_vfnmsub_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfnmsub.s $vr0, $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <4 x float> @llvm.loongarch.lsx.vfnmsub.s(<4 x float> %va, <4 x float> %vb, <4 x float> %vc) + ret <4 x float> %res +} + +declare <2 x double> @llvm.loongarch.lsx.vfnmsub.d(<2 x double>, <2 x double>, <2 x double>) + +define <2 x double> @lsx_vfnmsub_d(<2 x double> %va, <2 x double> %vb, <2 x double> %vc) nounwind { +; CHECK-LABEL: lsx_vfnmsub_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfnmsub.d $vr0, $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <2 x double> @llvm.loongarch.lsx.vfnmsub.d(<2 x double> %va, <2 x double> %vb, <2 x double> %vc) + ret <2 x double> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frecip.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frecip.ll new file mode 100644 index 0000000000000000000000000000000000000000..669fde5912d4b995e4491d1d2128562c68603dff --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frecip.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <4 x float> @llvm.loongarch.lsx.vfrecip.s(<4 x float>) + +define <4 x float> @lsx_vfrecip_s(<4 x float> %va) nounwind { +; CHECK-LABEL: lsx_vfrecip_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfrecip.s $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x float> @llvm.loongarch.lsx.vfrecip.s(<4 x float> %va) + ret <4 x float> %res +} + +declare <2 x double> @llvm.loongarch.lsx.vfrecip.d(<2 x double>) + +define <2 x double> @lsx_vfrecip_d(<2 x double> %va) nounwind { +; CHECK-LABEL: lsx_vfrecip_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfrecip.d $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x double> @llvm.loongarch.lsx.vfrecip.d(<2 x double> %va) + ret <2 x double> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frint.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frint.ll new file mode 100644 index 0000000000000000000000000000000000000000..8d872fc7296255166545edb7cdd06d530790a835 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frint.ll @@ -0,0 +1,122 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <4 x float> @llvm.loongarch.lsx.vfrintrne.s(<4 x float>) + +define <4 x float> @lsx_vfrintrne_s(<4 x float> %va) nounwind { +; CHECK-LABEL: lsx_vfrintrne_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfrintrne.s $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x float> @llvm.loongarch.lsx.vfrintrne.s(<4 x float> %va) + ret <4 x float> %res +} + +declare <2 x double> @llvm.loongarch.lsx.vfrintrne.d(<2 x double>) + +define <2 x double> @lsx_vfrintrne_d(<2 x double> %va) nounwind { +; CHECK-LABEL: lsx_vfrintrne_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfrintrne.d $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x double> @llvm.loongarch.lsx.vfrintrne.d(<2 x double> %va) + ret <2 x double> %res +} + +declare <4 x float> @llvm.loongarch.lsx.vfrintrz.s(<4 x float>) + +define <4 x float> @lsx_vfrintrz_s(<4 x float> %va) nounwind { +; CHECK-LABEL: lsx_vfrintrz_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfrintrz.s $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x float> @llvm.loongarch.lsx.vfrintrz.s(<4 x float> %va) + ret <4 x float> %res +} + +declare <2 x double> @llvm.loongarch.lsx.vfrintrz.d(<2 x double>) + +define <2 x double> @lsx_vfrintrz_d(<2 x double> %va) nounwind { +; CHECK-LABEL: lsx_vfrintrz_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfrintrz.d $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x double> @llvm.loongarch.lsx.vfrintrz.d(<2 x double> %va) + ret <2 x double> %res +} + +declare <4 x float> @llvm.loongarch.lsx.vfrintrp.s(<4 x float>) + +define <4 x float> @lsx_vfrintrp_s(<4 x float> %va) nounwind { +; CHECK-LABEL: lsx_vfrintrp_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfrintrp.s $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x float> @llvm.loongarch.lsx.vfrintrp.s(<4 x float> %va) + ret <4 x float> %res +} + +declare <2 x double> @llvm.loongarch.lsx.vfrintrp.d(<2 x double>) + +define <2 x double> @lsx_vfrintrp_d(<2 x double> %va) nounwind { +; CHECK-LABEL: lsx_vfrintrp_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfrintrp.d $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x double> @llvm.loongarch.lsx.vfrintrp.d(<2 x double> %va) + ret <2 x double> %res +} + +declare <4 x float> @llvm.loongarch.lsx.vfrintrm.s(<4 x float>) + +define <4 x float> @lsx_vfrintrm_s(<4 x float> %va) nounwind { +; CHECK-LABEL: lsx_vfrintrm_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfrintrm.s $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x float> @llvm.loongarch.lsx.vfrintrm.s(<4 x float> %va) + ret <4 x float> %res +} + +declare <2 x double> @llvm.loongarch.lsx.vfrintrm.d(<2 x double>) + +define <2 x double> @lsx_vfrintrm_d(<2 x double> %va) nounwind { +; CHECK-LABEL: lsx_vfrintrm_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfrintrm.d $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x double> @llvm.loongarch.lsx.vfrintrm.d(<2 x double> %va) + ret <2 x double> %res +} + +declare <4 x float> @llvm.loongarch.lsx.vfrint.s(<4 x float>) + +define <4 x float> @lsx_vfrint_s(<4 x float> %va) nounwind { +; CHECK-LABEL: lsx_vfrint_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfrint.s $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x float> @llvm.loongarch.lsx.vfrint.s(<4 x float> %va) + ret <4 x float> %res +} + +declare <2 x double> @llvm.loongarch.lsx.vfrint.d(<2 x double>) + +define <2 x double> @lsx_vfrint_d(<2 x double> %va) nounwind { +; CHECK-LABEL: lsx_vfrint_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfrint.d $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x double> @llvm.loongarch.lsx.vfrint.d(<2 x double> %va) + ret <2 x double> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frsqrt.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frsqrt.ll new file mode 100644 index 0000000000000000000000000000000000000000..326d87308b0ba61b20e5831eaf6e8c6e7f40cc26 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frsqrt.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <4 x float> @llvm.loongarch.lsx.vfrsqrt.s(<4 x float>) + +define <4 x float> @lsx_vfrsqrt_s(<4 x float> %va) nounwind { +; CHECK-LABEL: lsx_vfrsqrt_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfrsqrt.s $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x float> @llvm.loongarch.lsx.vfrsqrt.s(<4 x float> %va) + ret <4 x float> %res +} + +declare <2 x double> @llvm.loongarch.lsx.vfrsqrt.d(<2 x double>) + +define <2 x double> @lsx_vfrsqrt_d(<2 x double> %va) nounwind { +; CHECK-LABEL: lsx_vfrsqrt_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfrsqrt.d $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x double> @llvm.loongarch.lsx.vfrsqrt.d(<2 x double> %va) + ret <2 x double> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frstp-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frstp-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..0184c855c9c100d4cfb109b05a62547502753c29 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frstp-invalid-imm.ll @@ -0,0 +1,33 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vfrstpi.b(<16 x i8>, <16 x i8>, i32) + +define <16 x i8> @lsx_vfrstpi_b_lo(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vfrstpi.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vfrstpi.b(<16 x i8> %va, <16 x i8> %vb, i32 -1) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vfrstpi_b_hi(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vfrstpi.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vfrstpi.b(<16 x i8> %va, <16 x i8> %vb, i32 32) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vfrstpi.h(<8 x i16>, <8 x i16>, i32) + +define <8 x i16> @lsx_vfrstpi_h_lo(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vfrstpi.h: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vfrstpi.h(<8 x i16> %va, <8 x i16> %vb, i32 -1) + ret <8 x i16> %res +} + +define <8 x i16> @lsx_vfrstpi_h_hi(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vfrstpi.h: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vfrstpi.h(<8 x i16> %va, <8 x i16> %vb, i32 32) + ret <8 x i16> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frstp-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frstp-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..9583f672a305e64adc4d4dec59264faa2cd0f19d --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frstp-non-imm.ll @@ -0,0 +1,19 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vfrstpi.b(<16 x i8>, <16 x i8>, i32) + +define <16 x i8> @lsx_vfrstpi_b(<16 x i8> %va, <16 x i8> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vfrstpi.b(<16 x i8> %va, <16 x i8> %vb, i32 %c) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vfrstpi.h(<8 x i16>, <8 x i16>, i32) + +define <8 x i16> @lsx_vfrstpi_h(<8 x i16> %va, <8 x i16> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vfrstpi.h(<8 x i16> %va, <8 x i16> %vb, i32 %c) + ret <8 x i16> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frstp.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frstp.ll new file mode 100644 index 0000000000000000000000000000000000000000..5c072b194d4fef066f9e13993f2055ecdc5d7531 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-frstp.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vfrstp.b(<16 x i8>, <16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vfrstp_b(<16 x i8> %va, <16 x i8> %vb, <16 x i8> %vc) nounwind { +; CHECK-LABEL: lsx_vfrstp_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfrstp.b $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vfrstp.b(<16 x i8> %va, <16 x i8> %vb, <16 x i8> %vc) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vfrstp.h(<8 x i16>, <8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vfrstp_h(<8 x i16> %va, <8 x i16> %vb, <8 x i16> %vc) nounwind { +; CHECK-LABEL: lsx_vfrstp_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfrstp.h $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vfrstp.h(<8 x i16> %va, <8 x i16> %vb, <8 x i16> %vc) + ret <8 x i16> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vfrstpi.b(<16 x i8>, <16 x i8>, i32) + +define <16 x i8> @lsx_vfrstpi_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vfrstpi_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfrstpi.b $vr0, $vr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vfrstpi.b(<16 x i8> %va, <16 x i8> %vb, i32 1) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vfrstpi.h(<8 x i16>, <8 x i16>, i32) + +define <8 x i16> @lsx_vfrstpi_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vfrstpi_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfrstpi.h $vr0, $vr1, 31 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vfrstpi.h(<8 x i16> %va, <8 x i16> %vb, i32 31) + ret <8 x i16> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fsqrt.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fsqrt.ll new file mode 100644 index 0000000000000000000000000000000000000000..55bffba9e99e9c1c3b5517f34b4c3d47729cb681 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fsqrt.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <4 x float> @llvm.loongarch.lsx.vfsqrt.s(<4 x float>) + +define <4 x float> @lsx_vfsqrt_s(<4 x float> %va) nounwind { +; CHECK-LABEL: lsx_vfsqrt_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfsqrt.s $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x float> @llvm.loongarch.lsx.vfsqrt.s(<4 x float> %va) + ret <4 x float> %res +} + +declare <2 x double> @llvm.loongarch.lsx.vfsqrt.d(<2 x double>) + +define <2 x double> @lsx_vfsqrt_d(<2 x double> %va) nounwind { +; CHECK-LABEL: lsx_vfsqrt_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfsqrt.d $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x double> @llvm.loongarch.lsx.vfsqrt.d(<2 x double> %va) + ret <2 x double> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fsub.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fsub.ll new file mode 100644 index 0000000000000000000000000000000000000000..2beba4a70dc960ef8de6072fd560cc8d0554e8dd --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-fsub.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <4 x float> @llvm.loongarch.lsx.vfsub.s(<4 x float>, <4 x float>) + +define <4 x float> @lsx_vfsub_s(<4 x float> %va, <4 x float> %vb) nounwind { +; CHECK-LABEL: lsx_vfsub_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfsub.s $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x float> @llvm.loongarch.lsx.vfsub.s(<4 x float> %va, <4 x float> %vb) + ret <4 x float> %res +} + +declare <2 x double> @llvm.loongarch.lsx.vfsub.d(<2 x double>, <2 x double>) + +define <2 x double> @lsx_vfsub_d(<2 x double> %va, <2 x double> %vb) nounwind { +; CHECK-LABEL: lsx_vfsub_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vfsub.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x double> @llvm.loongarch.lsx.vfsub.d(<2 x double> %va, <2 x double> %vb) + ret <2 x double> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ftint.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ftint.ll new file mode 100644 index 0000000000000000000000000000000000000000..2a494cd7fa874b0d3589bf0bdeb1e5bf9bd062be --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ftint.ll @@ -0,0 +1,350 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <4 x i32> @llvm.loongarch.lsx.vftintrne.w.s(<4 x float>) + +define <4 x i32> @lsx_vftintrne_w_s(<4 x float> %va) nounwind { +; CHECK-LABEL: lsx_vftintrne_w_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vftintrne.w.s $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.s(<4 x float> %va) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vftintrne.l.d(<2 x double>) + +define <2 x i64> @lsx_vftintrne_l_d(<2 x double> %va) nounwind { +; CHECK-LABEL: lsx_vftintrne_l_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vftintrne.l.d $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vftintrne.l.d(<2 x double> %va) + ret <2 x i64> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vftintrz.w.s(<4 x float>) + +define <4 x i32> @lsx_vftintrz_w_s(<4 x float> %va) nounwind { +; CHECK-LABEL: lsx_vftintrz_w_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vftintrz.w.s $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.s(<4 x float> %va) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vftintrz.l.d(<2 x double>) + +define <2 x i64> @lsx_vftintrz_l_d(<2 x double> %va) nounwind { +; CHECK-LABEL: lsx_vftintrz_l_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vftintrz.l.d $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vftintrz.l.d(<2 x double> %va) + ret <2 x i64> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vftintrp.w.s(<4 x float>) + +define <4 x i32> @lsx_vftintrp_w_s(<4 x float> %va) nounwind { +; CHECK-LABEL: lsx_vftintrp_w_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vftintrp.w.s $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.s(<4 x float> %va) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vftintrp.l.d(<2 x double>) + +define <2 x i64> @lsx_vftintrp_l_d(<2 x double> %va) nounwind { +; CHECK-LABEL: lsx_vftintrp_l_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vftintrp.l.d $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vftintrp.l.d(<2 x double> %va) + ret <2 x i64> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vftintrm.w.s(<4 x float>) + +define <4 x i32> @lsx_vftintrm_w_s(<4 x float> %va) nounwind { +; CHECK-LABEL: lsx_vftintrm_w_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vftintrm.w.s $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.s(<4 x float> %va) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vftintrm.l.d(<2 x double>) + +define <2 x i64> @lsx_vftintrm_l_d(<2 x double> %va) nounwind { +; CHECK-LABEL: lsx_vftintrm_l_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vftintrm.l.d $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vftintrm.l.d(<2 x double> %va) + ret <2 x i64> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vftint.w.s(<4 x float>) + +define <4 x i32> @lsx_vftint_w_s(<4 x float> %va) nounwind { +; CHECK-LABEL: lsx_vftint_w_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vftint.w.s $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vftint.w.s(<4 x float> %va) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vftint.l.d(<2 x double>) + +define <2 x i64> @lsx_vftint_l_d(<2 x double> %va) nounwind { +; CHECK-LABEL: lsx_vftint_l_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vftint.l.d $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vftint.l.d(<2 x double> %va) + ret <2 x i64> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vftintrz.wu.s(<4 x float>) + +define <4 x i32> @lsx_vftintrz_wu_s(<4 x float> %va) nounwind { +; CHECK-LABEL: lsx_vftintrz_wu_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vftintrz.wu.s $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vftintrz.wu.s(<4 x float> %va) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vftintrz.lu.d(<2 x double>) + +define <2 x i64> @lsx_vftintrz_lu_d(<2 x double> %va) nounwind { +; CHECK-LABEL: lsx_vftintrz_lu_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vftintrz.lu.d $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vftintrz.lu.d(<2 x double> %va) + ret <2 x i64> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vftint.wu.s(<4 x float>) + +define <4 x i32> @lsx_vftint_wu_s(<4 x float> %va) nounwind { +; CHECK-LABEL: lsx_vftint_wu_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vftint.wu.s $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vftint.wu.s(<4 x float> %va) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vftint.lu.d(<2 x double>) + +define <2 x i64> @lsx_vftint_lu_d(<2 x double> %va) nounwind { +; CHECK-LABEL: lsx_vftint_lu_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vftint.lu.d $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vftint.lu.d(<2 x double> %va) + ret <2 x i64> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vftintrne.w.d(<2 x double>, <2 x double>) + +define <4 x i32> @lsx_vftintrne_w_d(<2 x double> %va, <2 x double> %vb) nounwind { +; CHECK-LABEL: lsx_vftintrne_w_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vftintrne.w.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.d(<2 x double> %va, <2 x double> %vb) + ret <4 x i32> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vftintrz.w.d(<2 x double>, <2 x double>) + +define <4 x i32> @lsx_vftintrz_w_d(<2 x double> %va, <2 x double> %vb) nounwind { +; CHECK-LABEL: lsx_vftintrz_w_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vftintrz.w.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.d(<2 x double> %va, <2 x double> %vb) + ret <4 x i32> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vftintrp.w.d(<2 x double>, <2 x double>) + +define <4 x i32> @lsx_vftintrp_w_d(<2 x double> %va, <2 x double> %vb) nounwind { +; CHECK-LABEL: lsx_vftintrp_w_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vftintrp.w.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.d(<2 x double> %va, <2 x double> %vb) + ret <4 x i32> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vftintrm.w.d(<2 x double>, <2 x double>) + +define <4 x i32> @lsx_vftintrm_w_d(<2 x double> %va, <2 x double> %vb) nounwind { +; CHECK-LABEL: lsx_vftintrm_w_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vftintrm.w.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.d(<2 x double> %va, <2 x double> %vb) + ret <4 x i32> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vftint.w.d(<2 x double>, <2 x double>) + +define <4 x i32> @lsx_vftint_w_d(<2 x double> %va, <2 x double> %vb) nounwind { +; CHECK-LABEL: lsx_vftint_w_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vftint.w.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vftint.w.d(<2 x double> %va, <2 x double> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vftintrnel.l.s(<4 x float>) + +define <2 x i64> @lsx_vftintrnel_l_s(<4 x float> %va) nounwind { +; CHECK-LABEL: lsx_vftintrnel_l_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vftintrnel.l.s $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vftintrnel.l.s(<4 x float> %va) + ret <2 x i64> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vftintrneh.l.s(<4 x float>) + +define <2 x i64> @lsx_vftintrneh_l_s(<4 x float> %va) nounwind { +; CHECK-LABEL: lsx_vftintrneh_l_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vftintrneh.l.s $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vftintrneh.l.s(<4 x float> %va) + ret <2 x i64> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vftintrzl.l.s(<4 x float>) + +define <2 x i64> @lsx_vftintrzl_l_s(<4 x float> %va) nounwind { +; CHECK-LABEL: lsx_vftintrzl_l_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vftintrzl.l.s $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vftintrzl.l.s(<4 x float> %va) + ret <2 x i64> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vftintrzh.l.s(<4 x float>) + +define <2 x i64> @lsx_vftintrzh_l_s(<4 x float> %va) nounwind { +; CHECK-LABEL: lsx_vftintrzh_l_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vftintrzh.l.s $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vftintrzh.l.s(<4 x float> %va) + ret <2 x i64> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vftintrpl.l.s(<4 x float>) + +define <2 x i64> @lsx_vftintrpl_l_s(<4 x float> %va) nounwind { +; CHECK-LABEL: lsx_vftintrpl_l_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vftintrpl.l.s $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vftintrpl.l.s(<4 x float> %va) + ret <2 x i64> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vftintrph.l.s(<4 x float>) + +define <2 x i64> @lsx_vftintrph_l_s(<4 x float> %va) nounwind { +; CHECK-LABEL: lsx_vftintrph_l_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vftintrph.l.s $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vftintrph.l.s(<4 x float> %va) + ret <2 x i64> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vftintrml.l.s(<4 x float>) + +define <2 x i64> @lsx_vftintrml_l_s(<4 x float> %va) nounwind { +; CHECK-LABEL: lsx_vftintrml_l_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vftintrml.l.s $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vftintrml.l.s(<4 x float> %va) + ret <2 x i64> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vftintrmh.l.s(<4 x float>) + +define <2 x i64> @lsx_vftintrmh_l_s(<4 x float> %va) nounwind { +; CHECK-LABEL: lsx_vftintrmh_l_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vftintrmh.l.s $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vftintrmh.l.s(<4 x float> %va) + ret <2 x i64> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vftintl.l.s(<4 x float>) + +define <2 x i64> @lsx_vftintl_l_s(<4 x float> %va) nounwind { +; CHECK-LABEL: lsx_vftintl_l_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vftintl.l.s $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vftintl.l.s(<4 x float> %va) + ret <2 x i64> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vftinth.l.s(<4 x float>) + +define <2 x i64> @lsx_vftinth_l_s(<4 x float> %va) nounwind { +; CHECK-LABEL: lsx_vftinth_l_s: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vftinth.l.s $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vftinth.l.s(<4 x float> %va) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-haddw.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-haddw.ll new file mode 100644 index 0000000000000000000000000000000000000000..05725582334ae383518965e678dac9bc602e49cb --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-haddw.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <8 x i16> @llvm.loongarch.lsx.vhaddw.h.b(<16 x i8>, <16 x i8>) + +define <8 x i16> @lsx_vhaddw_h_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vhaddw_h_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vhaddw.h.b $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vhaddw.h.b(<16 x i8> %va, <16 x i8> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vhaddw.w.h(<8 x i16>, <8 x i16>) + +define <4 x i32> @lsx_vhaddw_w_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vhaddw_w_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vhaddw.w.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vhaddw.w.h(<8 x i16> %va, <8 x i16> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vhaddw.d.w(<4 x i32>, <4 x i32>) + +define <2 x i64> @lsx_vhaddw_d_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vhaddw_d_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vhaddw.d.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vhaddw.d.w(<4 x i32> %va, <4 x i32> %vb) + ret <2 x i64> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vhaddw.q.d(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vhaddw_q_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vhaddw_q_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vhaddw.q.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vhaddw.q.d(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vhaddw.hu.bu(<16 x i8>, <16 x i8>) + +define <8 x i16> @lsx_vhaddw_hu_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vhaddw_hu_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vhaddw.hu.bu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vhaddw.hu.bu(<16 x i8> %va, <16 x i8> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vhaddw.wu.hu(<8 x i16>, <8 x i16>) + +define <4 x i32> @lsx_vhaddw_wu_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vhaddw_wu_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vhaddw.wu.hu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vhaddw.wu.hu(<8 x i16> %va, <8 x i16> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vhaddw.du.wu(<4 x i32>, <4 x i32>) + +define <2 x i64> @lsx_vhaddw_du_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vhaddw_du_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vhaddw.du.wu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vhaddw.du.wu(<4 x i32> %va, <4 x i32> %vb) + ret <2 x i64> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vhaddw.qu.du(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vhaddw_qu_du(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vhaddw_qu_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vhaddw.qu.du $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vhaddw.qu.du(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-hsubw.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-hsubw.ll new file mode 100644 index 0000000000000000000000000000000000000000..dd5815b2ea85a6aa5ec2fd6abdc6971e536f64c3 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-hsubw.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <8 x i16> @llvm.loongarch.lsx.vhsubw.h.b(<16 x i8>, <16 x i8>) + +define <8 x i16> @lsx_vhsubw_h_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vhsubw_h_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vhsubw.h.b $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vhsubw.h.b(<16 x i8> %va, <16 x i8> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vhsubw.w.h(<8 x i16>, <8 x i16>) + +define <4 x i32> @lsx_vhsubw_w_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vhsubw_w_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vhsubw.w.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vhsubw.w.h(<8 x i16> %va, <8 x i16> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vhsubw.d.w(<4 x i32>, <4 x i32>) + +define <2 x i64> @lsx_vhsubw_d_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vhsubw_d_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vhsubw.d.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vhsubw.d.w(<4 x i32> %va, <4 x i32> %vb) + ret <2 x i64> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vhsubw.q.d(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vhsubw_q_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vhsubw_q_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vhsubw.q.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vhsubw.q.d(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vhsubw.hu.bu(<16 x i8>, <16 x i8>) + +define <8 x i16> @lsx_vhsubw_hu_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vhsubw_hu_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vhsubw.hu.bu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vhsubw.hu.bu(<16 x i8> %va, <16 x i8> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vhsubw.wu.hu(<8 x i16>, <8 x i16>) + +define <4 x i32> @lsx_vhsubw_wu_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vhsubw_wu_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vhsubw.wu.hu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vhsubw.wu.hu(<8 x i16> %va, <8 x i16> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vhsubw.du.wu(<4 x i32>, <4 x i32>) + +define <2 x i64> @lsx_vhsubw_du_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vhsubw_du_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vhsubw.du.wu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vhsubw.du.wu(<4 x i32> %va, <4 x i32> %vb) + ret <2 x i64> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vhsubw.qu.du(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vhsubw_qu_du(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vhsubw_qu_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vhsubw.qu.du $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vhsubw.qu.du(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ilv.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ilv.ll new file mode 100644 index 0000000000000000000000000000000000000000..77b0b3484df8c47a2594746a705c8bddbbe5591f --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ilv.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vilvl.b(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vilvl_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vilvl_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vilvl.b $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vilvl.b(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vilvl.h(<8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vilvl_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vilvl_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vilvl.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vilvl.h(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vilvl.w(<4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vilvl_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vilvl_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vilvl.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vilvl.w(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vilvl.d(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vilvl_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vilvl_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vilvl.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vilvl.d(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vilvh.b(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vilvh_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vilvh_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vilvh.b $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vilvh.b(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vilvh.h(<8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vilvh_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vilvh_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vilvh.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vilvh.h(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vilvh.w(<4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vilvh_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vilvh_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vilvh.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vilvh.w(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vilvh.d(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vilvh_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vilvh_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vilvh.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vilvh.d(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-insgr2vr-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-insgr2vr-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..3d4f84fb6e0388ec81495d0f5ecafb5e28918548 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-insgr2vr-invalid-imm.ll @@ -0,0 +1,65 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vinsgr2vr.b(<16 x i8>, i32, i32) + +define <16 x i8> @lsx_vinsgr2vr_b_lo(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vinsgr2vr.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vinsgr2vr.b(<16 x i8> %va, i32 1, i32 -1) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vinsgr2vr_b_hi(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vinsgr2vr.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vinsgr2vr.b(<16 x i8> %va, i32 1, i32 16) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vinsgr2vr.h(<8 x i16>, i32, i32) + +define <8 x i16> @lsx_vinsgr2vr_h_lo(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vinsgr2vr.h: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vinsgr2vr.h(<8 x i16> %va, i32 1, i32 -1) + ret <8 x i16> %res +} + +define <8 x i16> @lsx_vinsgr2vr_h_hi(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vinsgr2vr.h: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vinsgr2vr.h(<8 x i16> %va, i32 1, i32 8) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vinsgr2vr.w(<4 x i32>, i32, i32) + +define <4 x i32> @lsx_vinsgr2vr_w_lo(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vinsgr2vr.w: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vinsgr2vr.w(<4 x i32> %va, i32 1, i32 -1) + ret <4 x i32> %res +} + +define <4 x i32> @lsx_vinsgr2vr_w_hi(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vinsgr2vr.w: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vinsgr2vr.w(<4 x i32> %va, i32 1, i32 4) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vinsgr2vr.d(<2 x i64>, i64, i32) + +define <2 x i64> @lsx_vinsgr2vr_d_lo(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vinsgr2vr.d: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vinsgr2vr.d(<2 x i64> %va, i64 1, i32 -1) + ret <2 x i64> %res +} + +define <2 x i64> @lsx_vinsgr2vr_d_hi(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vinsgr2vr.d: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vinsgr2vr.d(<2 x i64> %va, i64 1, i32 2) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-insgr2vr-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-insgr2vr-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..2a4c2218de8c9e93c485b355b4f61fcdfdba20a5 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-insgr2vr-non-imm.ll @@ -0,0 +1,37 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vinsgr2vr.b(<16 x i8>, i32, i32) + +define <16 x i8> @lsx_vinsgr2vr_b(<16 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vinsgr2vr.b(<16 x i8> %va, i32 1, i32 %b) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vinsgr2vr.h(<8 x i16>, i32, i32) + +define <8 x i16> @lsx_vinsgr2vr_h(<8 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vinsgr2vr.h(<8 x i16> %va, i32 1, i32 %b) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vinsgr2vr.w(<4 x i32>, i32, i32) + +define <4 x i32> @lsx_vinsgr2vr_w(<4 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vinsgr2vr.w(<4 x i32> %va, i32 1, i32 %b) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vinsgr2vr.d(<2 x i64>, i64, i32) + +define <2 x i64> @lsx_vinsgr2vr_d(<2 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vinsgr2vr.d(<2 x i64> %va, i64 1, i32 %b) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-insgr2vr.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-insgr2vr.ll new file mode 100644 index 0000000000000000000000000000000000000000..61d2cbd2806646fb765dd685eff2676bc31f7002 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-insgr2vr.ll @@ -0,0 +1,54 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vinsgr2vr.b(<16 x i8>, i32, i32) + +define <16 x i8> @lsx_vinsgr2vr_b(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_vinsgr2vr_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ori $a0, $zero, 1 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 15 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vinsgr2vr.b(<16 x i8> %va, i32 1, i32 15) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vinsgr2vr.h(<8 x i16>, i32, i32) + +define <8 x i16> @lsx_vinsgr2vr_h(<8 x i16> %va) nounwind { +; CHECK-LABEL: lsx_vinsgr2vr_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ori $a0, $zero, 1 +; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 7 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vinsgr2vr.h(<8 x i16> %va, i32 1, i32 7) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vinsgr2vr.w(<4 x i32>, i32, i32) + +define <4 x i32> @lsx_vinsgr2vr_w(<4 x i32> %va) nounwind { +; CHECK-LABEL: lsx_vinsgr2vr_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ori $a0, $zero, 1 +; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 3 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vinsgr2vr.w(<4 x i32> %va, i32 1, i32 3) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vinsgr2vr.d(<2 x i64>, i64, i32) + +define <2 x i64> @lsx_vinsgr2vr_d(<2 x i64> %va) nounwind { +; CHECK-LABEL: lsx_vinsgr2vr_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ori $a0, $zero, 1 +; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vinsgr2vr.d(<2 x i64> %va, i64 1, i32 1) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ld-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ld-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..3aeb30ce66b4441d452aa875ea97870d00ab6a0d --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ld-invalid-imm.ll @@ -0,0 +1,17 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vld(i8*, i32) + +define <16 x i8> @lsx_vld_lo(i8* %p) nounwind { +; CHECK: llvm.loongarch.lsx.vld: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vld(i8* %p, i32 -2049) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vld_hi(i8* %p) nounwind { +; CHECK: llvm.loongarch.lsx.vld: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vld(i8* %p, i32 2048) + ret <16 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ld-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ld-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..db6a0318d87aefe1a2955ea91cf83a3742b2547b --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ld-non-imm.ll @@ -0,0 +1,10 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vld(i8*, i32) + +define <16 x i8> @lsx_vld(i8* %p, i32 %a) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vld(i8* %p, i32 %a) + ret <16 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ld.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ld.ll new file mode 100644 index 0000000000000000000000000000000000000000..b9e2ff8088d834d0c9ff270ecbe355f8747106e0 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ld.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vld(i8*, i32) + +define <16 x i8> @lsx_vld(i8* %p) nounwind { +; CHECK-LABEL: lsx_vld: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a0, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vld(i8* %p, i32 1) + ret <16 x i8> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vldx(i8*, i64) + +define <16 x i8> @lsx_vldx(i8* %p, i64 %b) nounwind { +; CHECK-LABEL: lsx_vldx: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vldx $vr0, $a0, $a1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vldx(i8* %p, i64 %b) + ret <16 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldi-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldi-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..57f6f8e81d91c57313524356b9e8bd7ce52d0caa --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldi-invalid-imm.ll @@ -0,0 +1,81 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <2 x i64> @llvm.loongarch.lsx.vldi(i32) + +define <2 x i64> @lsx_vldi_lo() nounwind { +; CHECK: llvm.loongarch.lsx.vldi: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vldi(i32 -4097) + ret <2 x i64> %res +} + +define <2 x i64> @lsx_vldi_hi() nounwind { +; CHECK: llvm.loongarch.lsx.vldi: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vldi(i32 4096) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vrepli.b(i32) + +define <16 x i8> @lsx_vrepli_b_lo() nounwind { +; CHECK: llvm.loongarch.lsx.vrepli.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vrepli.b(i32 -513) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vrepli_b_hi() nounwind { +; CHECK: llvm.loongarch.lsx.vrepli.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vrepli.b(i32 512) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vrepli.h(i32) + +define <8 x i16> @lsx_vrepli_h_lo() nounwind { +; CHECK: llvm.loongarch.lsx.vrepli.h: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vrepli.h(i32 -513) + ret <8 x i16> %res +} + +define <8 x i16> @lsx_vrepli_h_hi() nounwind { +; CHECK: llvm.loongarch.lsx.vrepli.h: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vrepli.h(i32 512) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vrepli.w(i32) + +define <4 x i32> @lsx_vrepli_w_lo() nounwind { +; CHECK: llvm.loongarch.lsx.vrepli.w: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vrepli.w(i32 -513) + ret <4 x i32> %res +} + +define <4 x i32> @lsx_vrepli_w_hi() nounwind { +; CHECK: llvm.loongarch.lsx.vrepli.w: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vrepli.w(i32 512) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vrepli.d(i32) + +define <2 x i64> @lsx_vrepli_d_lo() nounwind { +; CHECK: llvm.loongarch.lsx.vrepli.d: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vrepli.d(i32 -513) + ret <2 x i64> %res +} + +define <2 x i64> @lsx_vrepli_d_hi() nounwind { +; CHECK: llvm.loongarch.lsx.vrepli.d: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vrepli.d(i32 512) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldi-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldi-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..a8f8278f8097a223a257607c8489175226a43ffc --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldi-non-imm.ll @@ -0,0 +1,46 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <2 x i64> @llvm.loongarch.lsx.vldi(i32) + +define <2 x i64> @lsx_vldi(i32 %a) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vldi(i32 %a) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vrepli.b(i32) + +define <16 x i8> @lsx_vrepli_b(i32 %a) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vrepli.b(i32 %a) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vrepli.h(i32) + +define <8 x i16> @lsx_vrepli_h(i32 %a) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vrepli.h(i32 %a) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vrepli.w(i32) + +define <4 x i32> @lsx_vrepli_w(i32 %a) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vrepli.w(i32 %a) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vrepli.d(i32) + +define <2 x i64> @lsx_vrepli_d(i32 %a) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vrepli.d(i32 %a) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldi.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldi.ll new file mode 100644 index 0000000000000000000000000000000000000000..ace910b54d9a6b4c0821430422f08e46b150a47c --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldi.ll @@ -0,0 +1,62 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <2 x i64> @llvm.loongarch.lsx.vldi(i32) + +define <2 x i64> @lsx_vldi() nounwind { +; CHECK-LABEL: lsx_vldi: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vldi $vr0, 4095 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vldi(i32 4095) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vrepli.b(i32) + +define <16 x i8> @lsx_vrepli_b() nounwind { +; CHECK-LABEL: lsx_vrepli_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vrepli.b $vr0, 511 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vrepli.b(i32 511) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vrepli.h(i32) + +define <8 x i16> @lsx_vrepli_h() nounwind { +; CHECK-LABEL: lsx_vrepli_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vrepli.h $vr0, 511 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vrepli.h(i32 511) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vrepli.w(i32) + +define <4 x i32> @lsx_vrepli_w() nounwind { +; CHECK-LABEL: lsx_vrepli_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vrepli.w $vr0, 511 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vrepli.w(i32 511) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vrepli.d(i32) + +define <2 x i64> @lsx_vrepli_d() nounwind { +; CHECK-LABEL: lsx_vrepli_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vrepli.d $vr0, 511 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vrepli.d(i32 511) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldrepl-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldrepl-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..cb640e1245daa722261390f30b89df5cc69012b9 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldrepl-invalid-imm.ll @@ -0,0 +1,65 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vldrepl.b(i8*, i32) + +define <16 x i8> @lsx_vldrepl_b_lo(i8* %p) nounwind { +; CHECK: llvm.loongarch.lsx.vldrepl.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vldrepl.b(i8* %p, i32 -2049) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vldrepl_b_hi(i8* %p) nounwind { +; CHECK: llvm.loongarch.lsx.vldrepl.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vldrepl.b(i8* %p, i32 2048) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vldrepl.h(i8*, i32) + +define <8 x i16> @lsx_vldrepl_h_lo(i8* %p) nounwind { +; CHECK: llvm.loongarch.lsx.vldrepl.h: argument out of range or not a multiple of 2. +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vldrepl.h(i8* %p, i32 -2050) + ret <8 x i16> %res +} + +define <8 x i16> @lsx_vldrepl_h_hi(i8* %p) nounwind { +; CHECK: llvm.loongarch.lsx.vldrepl.h: argument out of range or not a multiple of 2. +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vldrepl.h(i8* %p, i32 2048) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vldrepl.w(i8*, i32) + +define <4 x i32> @lsx_vldrepl_w_lo(i8* %p) nounwind { +; CHECK: llvm.loongarch.lsx.vldrepl.w: argument out of range or not a multiple of 4. +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vldrepl.w(i8* %p, i32 -2052) + ret <4 x i32> %res +} + +define <4 x i32> @lsx_vldrepl_w_hi(i8* %p) nounwind { +; CHECK: llvm.loongarch.lsx.vldrepl.w: argument out of range or not a multiple of 4. +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vldrepl.w(i8* %p, i32 2048) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vldrepl.d(i8*, i32) + +define <2 x i64> @lsx_vldrepl_d_lo(i8* %p) nounwind { +; CHECK: llvm.loongarch.lsx.vldrepl.d: argument out of range or not a multiple of 8. +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vldrepl.d(i8* %p, i32 -2056) + ret <2 x i64> %res +} + +define <2 x i64> @lsx_vldrepl_d_hi(i8* %p) nounwind { +; CHECK: llvm.loongarch.lsx.vldrepl.d: argument out of range or not a multiple of 8. +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vldrepl.d(i8* %p, i32 2048) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldrepl-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldrepl-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..e60b21913c6995d4d4c07c18d3390933f8366b34 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldrepl-non-imm.ll @@ -0,0 +1,37 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vldrepl.b(i8*, i32) + +define <16 x i8> @lsx_vldrepl_b(i8* %p, i32 %a) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vldrepl.b(i8* %p, i32 %a) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vldrepl.h(i8*, i32) + +define <8 x i16> @lsx_vldrepl_h(i8* %p, i32 %a) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vldrepl.h(i8* %p, i32 %a) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vldrepl.w(i8*, i32) + +define <4 x i32> @lsx_vldrepl_w(i8* %p, i32 %a) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vldrepl.w(i8* %p, i32 %a) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vldrepl.d(i8*, i32) + +define <2 x i64> @lsx_vldrepl_d(i8* %p, i32 %a) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vldrepl.d(i8* %p, i32 %a) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldrepl.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldrepl.ll new file mode 100644 index 0000000000000000000000000000000000000000..1a9cf3d3a7665d8f8be9c5d0546bdbe2afc00b52 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ldrepl.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vldrepl.b(i8*, i32) + +define <16 x i8> @lsx_vldrepl_b(i8* %p, i32 %b) nounwind { +; CHECK-LABEL: lsx_vldrepl_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vldrepl.b $vr0, $a0, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vldrepl.b(i8* %p, i32 1) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vldrepl.h(i8*, i32) + +define <8 x i16> @lsx_vldrepl_h(i8* %p, i32 %b) nounwind { +; CHECK-LABEL: lsx_vldrepl_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vldrepl.h $vr0, $a0, 2 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vldrepl.h(i8* %p, i32 2) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vldrepl.w(i8*, i32) + +define <4 x i32> @lsx_vldrepl_w(i8* %p, i32 %b) nounwind { +; CHECK-LABEL: lsx_vldrepl_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vldrepl.w $vr0, $a0, 4 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vldrepl.w(i8* %p, i32 4) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vldrepl.d(i8*, i32) + +define <2 x i64> @lsx_vldrepl_d(i8* %p, i32 %b) nounwind { +; CHECK-LABEL: lsx_vldrepl_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vldrepl.d $vr0, $a0, 8 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vldrepl.d(i8* %p, i32 8) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-madd.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-madd.ll new file mode 100644 index 0000000000000000000000000000000000000000..89503724fd730ebd078eeeb936ee3cf5e82542bf --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-madd.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vmadd.b(<16 x i8>, <16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vmadd_b(<16 x i8> %va, <16 x i8> %vb, <16 x i8> %vc) nounwind { +; CHECK-LABEL: lsx_vmadd_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmadd.b $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vmadd.b(<16 x i8> %va, <16 x i8> %vb, <16 x i8> %vc) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vmadd.h(<8 x i16>, <8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vmadd_h(<8 x i16> %va, <8 x i16> %vb, <8 x i16> %vc) nounwind { +; CHECK-LABEL: lsx_vmadd_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmadd.h $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vmadd.h(<8 x i16> %va, <8 x i16> %vb, <8 x i16> %vc) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vmadd.w(<4 x i32>, <4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vmadd_w(<4 x i32> %va, <4 x i32> %vb, <4 x i32> %vc) nounwind { +; CHECK-LABEL: lsx_vmadd_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmadd.w $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vmadd.w(<4 x i32> %va, <4 x i32> %vb, <4 x i32> %vc) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmadd.d(<2 x i64>, <2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vmadd_d(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) nounwind { +; CHECK-LABEL: lsx_vmadd_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmadd.d $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmadd.d(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-maddw.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-maddw.ll new file mode 100644 index 0000000000000000000000000000000000000000..1e3ab25a5fcf1ac4aa891616ec4615284c544a38 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-maddw.ll @@ -0,0 +1,290 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.b(<8 x i16>, <16 x i8>, <16 x i8>) + +define <8 x i16> @lsx_vmaddwev_h_b(<8 x i16> %va, <16 x i8> %vb, <16 x i8> %vc) nounwind { +; CHECK-LABEL: lsx_vmaddwev_h_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmaddwev.h.b $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.b(<8 x i16> %va, <16 x i8> %vb, <16 x i8> %vc) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.h(<4 x i32>, <8 x i16>, <8 x i16>) + +define <4 x i32> @lsx_vmaddwev_w_h(<4 x i32> %va, <8 x i16> %vb, <8 x i16> %vc) nounwind { +; CHECK-LABEL: lsx_vmaddwev_w_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmaddwev.w.h $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.h(<4 x i32> %va, <8 x i16> %vb, <8 x i16> %vc) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.w(<2 x i64>, <4 x i32>, <4 x i32>) + +define <2 x i64> @lsx_vmaddwev_d_w(<2 x i64> %va, <4 x i32> %vb, <4 x i32> %vc) nounwind { +; CHECK-LABEL: lsx_vmaddwev_d_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmaddwev.d.w $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.w(<2 x i64> %va, <4 x i32> %vb, <4 x i32> %vc) + ret <2 x i64> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.d(<2 x i64>, <2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vmaddwev_q_d(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) nounwind { +; CHECK-LABEL: lsx_vmaddwev_q_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmaddwev.q.d $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.d(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) + ret <2 x i64> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu(<8 x i16>, <16 x i8>, <16 x i8>) + +define <8 x i16> @lsx_vmaddwev_h_bu(<8 x i16> %va, <16 x i8> %vb, <16 x i8> %vc) nounwind { +; CHECK-LABEL: lsx_vmaddwev_h_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmaddwev.h.bu $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu(<8 x i16> %va, <16 x i8> %vb, <16 x i8> %vc) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu(<4 x i32>, <8 x i16>, <8 x i16>) + +define <4 x i32> @lsx_vmaddwev_w_hu(<4 x i32> %va, <8 x i16> %vb, <8 x i16> %vc) nounwind { +; CHECK-LABEL: lsx_vmaddwev_w_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmaddwev.w.hu $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu(<4 x i32> %va, <8 x i16> %vb, <8 x i16> %vc) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu(<2 x i64>, <4 x i32>, <4 x i32>) + +define <2 x i64> @lsx_vmaddwev_d_wu(<2 x i64> %va, <4 x i32> %vb, <4 x i32> %vc) nounwind { +; CHECK-LABEL: lsx_vmaddwev_d_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmaddwev.d.wu $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu(<2 x i64> %va, <4 x i32> %vb, <4 x i32> %vc) + ret <2 x i64> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du(<2 x i64>, <2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vmaddwev_q_du(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) nounwind { +; CHECK-LABEL: lsx_vmaddwev_q_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmaddwev.q.du $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) + ret <2 x i64> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu.b(<8 x i16>, <16 x i8>, <16 x i8>) + +define <8 x i16> @lsx_vmaddwev_h_bu_b(<8 x i16> %va, <16 x i8> %vb, <16 x i8> %vc) nounwind { +; CHECK-LABEL: lsx_vmaddwev_h_bu_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmaddwev.h.bu.b $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu.b(<8 x i16> %va, <16 x i8> %vb, <16 x i8> %vc) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu.h(<4 x i32>, <8 x i16>, <8 x i16>) + +define <4 x i32> @lsx_vmaddwev_w_hu_h(<4 x i32> %va, <8 x i16> %vb, <8 x i16> %vc) nounwind { +; CHECK-LABEL: lsx_vmaddwev_w_hu_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmaddwev.w.hu.h $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu.h(<4 x i32> %va, <8 x i16> %vb, <8 x i16> %vc) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu.w(<2 x i64>, <4 x i32>, <4 x i32>) + +define <2 x i64> @lsx_vmaddwev_d_wu_w(<2 x i64> %va, <4 x i32> %vb, <4 x i32> %vc) nounwind { +; CHECK-LABEL: lsx_vmaddwev_d_wu_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmaddwev.d.wu.w $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu.w(<2 x i64> %va, <4 x i32> %vb, <4 x i32> %vc) + ret <2 x i64> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du.d(<2 x i64>, <2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vmaddwev_q_du_d(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) nounwind { +; CHECK-LABEL: lsx_vmaddwev_q_du_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmaddwev.q.du.d $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du.d(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) + ret <2 x i64> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.b(<8 x i16>, <16 x i8>, <16 x i8>) + +define <8 x i16> @lsx_vmaddwod_h_b(<8 x i16> %va, <16 x i8> %vb, <16 x i8> %vc) nounwind { +; CHECK-LABEL: lsx_vmaddwod_h_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmaddwod.h.b $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.b(<8 x i16> %va, <16 x i8> %vb, <16 x i8> %vc) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.h(<4 x i32>, <8 x i16>, <8 x i16>) + +define <4 x i32> @lsx_vmaddwod_w_h(<4 x i32> %va, <8 x i16> %vb, <8 x i16> %vc) nounwind { +; CHECK-LABEL: lsx_vmaddwod_w_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmaddwod.w.h $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.h(<4 x i32> %va, <8 x i16> %vb, <8 x i16> %vc) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.w(<2 x i64>, <4 x i32>, <4 x i32>) + +define <2 x i64> @lsx_vmaddwod_d_w(<2 x i64> %va, <4 x i32> %vb, <4 x i32> %vc) nounwind { +; CHECK-LABEL: lsx_vmaddwod_d_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmaddwod.d.w $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.w(<2 x i64> %va, <4 x i32> %vb, <4 x i32> %vc) + ret <2 x i64> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.d(<2 x i64>, <2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vmaddwod_q_d(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) nounwind { +; CHECK-LABEL: lsx_vmaddwod_q_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmaddwod.q.d $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.d(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) + ret <2 x i64> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu(<8 x i16>, <16 x i8>, <16 x i8>) + +define <8 x i16> @lsx_vmaddwod_h_bu(<8 x i16> %va, <16 x i8> %vb, <16 x i8> %vc) nounwind { +; CHECK-LABEL: lsx_vmaddwod_h_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmaddwod.h.bu $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu(<8 x i16> %va, <16 x i8> %vb, <16 x i8> %vc) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu(<4 x i32>, <8 x i16>, <8 x i16>) + +define <4 x i32> @lsx_vmaddwod_w_hu(<4 x i32> %va, <8 x i16> %vb, <8 x i16> %vc) nounwind { +; CHECK-LABEL: lsx_vmaddwod_w_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmaddwod.w.hu $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu(<4 x i32> %va, <8 x i16> %vb, <8 x i16> %vc) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu(<2 x i64>, <4 x i32>, <4 x i32>) + +define <2 x i64> @lsx_vmaddwod_d_wu(<2 x i64> %va, <4 x i32> %vb, <4 x i32> %vc) nounwind { +; CHECK-LABEL: lsx_vmaddwod_d_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmaddwod.d.wu $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu(<2 x i64> %va, <4 x i32> %vb, <4 x i32> %vc) + ret <2 x i64> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du(<2 x i64>, <2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vmaddwod_q_du(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) nounwind { +; CHECK-LABEL: lsx_vmaddwod_q_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmaddwod.q.du $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) + ret <2 x i64> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu.b(<8 x i16>, <16 x i8>, <16 x i8>) + +define <8 x i16> @lsx_vmaddwod_h_bu_b(<8 x i16> %va, <16 x i8> %vb, <16 x i8> %vc) nounwind { +; CHECK-LABEL: lsx_vmaddwod_h_bu_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmaddwod.h.bu.b $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu.b(<8 x i16> %va, <16 x i8> %vb, <16 x i8> %vc) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu.h(<4 x i32>, <8 x i16>, <8 x i16>) + +define <4 x i32> @lsx_vmaddwod_w_hu_h(<4 x i32> %va, <8 x i16> %vb, <8 x i16> %vc) nounwind { +; CHECK-LABEL: lsx_vmaddwod_w_hu_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmaddwod.w.hu.h $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu.h(<4 x i32> %va, <8 x i16> %vb, <8 x i16> %vc) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu.w(<2 x i64>, <4 x i32>, <4 x i32>) + +define <2 x i64> @lsx_vmaddwod_d_wu_w(<2 x i64> %va, <4 x i32> %vb, <4 x i32> %vc) nounwind { +; CHECK-LABEL: lsx_vmaddwod_d_wu_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmaddwod.d.wu.w $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu.w(<2 x i64> %va, <4 x i32> %vb, <4 x i32> %vc) + ret <2 x i64> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du.d(<2 x i64>, <2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vmaddwod_q_du_d(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) nounwind { +; CHECK-LABEL: lsx_vmaddwod_q_du_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmaddwod.q.du.d $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du.d(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-max-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-max-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..667ba32723fc4f03bf7401b417b4012351576e1a --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-max-invalid-imm.ll @@ -0,0 +1,129 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vmaxi.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vmaxi_b_lo(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vmaxi.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vmaxi.b(<16 x i8> %va, i32 -17) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vmaxi_b_hi(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vmaxi.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vmaxi.b(<16 x i8> %va, i32 16) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vmaxi.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vmaxi_h_lo(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vmaxi.h: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vmaxi.h(<8 x i16> %va, i32 -17) + ret <8 x i16> %res +} + +define <8 x i16> @lsx_vmaxi_h_hi(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vmaxi.h: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vmaxi.h(<8 x i16> %va, i32 16) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vmaxi.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vmaxi_w_lo(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vmaxi.w: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vmaxi.w(<4 x i32> %va, i32 -17) + ret <4 x i32> %res +} + +define <4 x i32> @lsx_vmaxi_w_hi(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vmaxi.w: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vmaxi.w(<4 x i32> %va, i32 16) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmaxi.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vmaxi_d_lo(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vmaxi.d: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmaxi.d(<2 x i64> %va, i32 -17) + ret <2 x i64> %res +} + +define <2 x i64> @lsx_vmaxi_d_hi(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vmaxi.d: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmaxi.d(<2 x i64> %va, i32 16) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vmaxi.bu(<16 x i8>, i32) + +define <16 x i8> @lsx_vmaxi_bu_lo(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vmaxi.bu: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vmaxi.bu(<16 x i8> %va, i32 -1) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vmaxi_bu_hi(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vmaxi.bu: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vmaxi.bu(<16 x i8> %va, i32 32) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vmaxi.hu(<8 x i16>, i32) + +define <8 x i16> @lsx_vmaxi_hu_lo(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vmaxi.hu: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vmaxi.hu(<8 x i16> %va, i32 -1) + ret <8 x i16> %res +} + +define <8 x i16> @lsx_vmaxi_hu_hi(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vmaxi.hu: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vmaxi.hu(<8 x i16> %va, i32 32) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vmaxi.wu(<4 x i32>, i32) + +define <4 x i32> @lsx_vmaxi_wu_lo(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vmaxi.wu: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vmaxi.wu(<4 x i32> %va, i32 -1) + ret <4 x i32> %res +} + +define <4 x i32> @lsx_vmaxi_wu_hi(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vmaxi.wu: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vmaxi.wu(<4 x i32> %va, i32 32) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmaxi.du(<2 x i64>, i32) + +define <2 x i64> @lsx_vmaxi_du_lo(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vmaxi.du: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmaxi.du(<2 x i64> %va, i32 -1) + ret <2 x i64> %res +} + +define <2 x i64> @lsx_vmaxi_du_hi(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vmaxi.du: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmaxi.du(<2 x i64> %va, i32 32) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-max-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-max-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..34bbe3495670745b7632b7f5ff41427f38355ab0 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-max-non-imm.ll @@ -0,0 +1,73 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vmaxi.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vmaxi_b(<16 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vmaxi.b(<16 x i8> %va, i32 %b) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vmaxi.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vmaxi_h(<8 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vmaxi.h(<8 x i16> %va, i32 %b) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vmaxi.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vmaxi_w(<4 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vmaxi.w(<4 x i32> %va, i32 %b) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmaxi.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vmaxi_d(<2 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmaxi.d(<2 x i64> %va, i32 %b) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vmaxi.bu(<16 x i8>, i32) + +define <16 x i8> @lsx_vmaxi_bu(<16 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vmaxi.bu(<16 x i8> %va, i32 %b) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vmaxi.hu(<8 x i16>, i32) + +define <8 x i16> @lsx_vmaxi_hu(<8 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vmaxi.hu(<8 x i16> %va, i32 %b) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vmaxi.wu(<4 x i32>, i32) + +define <4 x i32> @lsx_vmaxi_wu(<4 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vmaxi.wu(<4 x i32> %va, i32 %b) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmaxi.du(<2 x i64>, i32) + +define <2 x i64> @lsx_vmaxi_du(<2 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmaxi.du(<2 x i64> %va, i32 %b) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-max.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-max.ll new file mode 100644 index 0000000000000000000000000000000000000000..4dd289cf6ed7202981893352c7d49d4b2cd775d1 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-max.ll @@ -0,0 +1,194 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vmax.b(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vmax_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vmax_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmax.b $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vmax.b(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vmax.h(<8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vmax_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vmax_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmax.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vmax.h(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vmax.w(<4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vmax_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vmax_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmax.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vmax.w(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmax.d(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vmax_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vmax_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmax.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmax.d(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vmaxi.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vmaxi_b(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_vmaxi_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmaxi.b $vr0, $vr0, -16 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vmaxi.b(<16 x i8> %va, i32 -16) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vmaxi.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vmaxi_h(<8 x i16> %va) nounwind { +; CHECK-LABEL: lsx_vmaxi_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmaxi.h $vr0, $vr0, -16 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vmaxi.h(<8 x i16> %va, i32 -16) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vmaxi.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vmaxi_w(<4 x i32> %va) nounwind { +; CHECK-LABEL: lsx_vmaxi_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmaxi.w $vr0, $vr0, 15 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vmaxi.w(<4 x i32> %va, i32 15) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmaxi.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vmaxi_d(<2 x i64> %va) nounwind { +; CHECK-LABEL: lsx_vmaxi_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmaxi.d $vr0, $vr0, 15 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmaxi.d(<2 x i64> %va, i32 15) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vmax.bu(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vmax_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vmax_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmax.bu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vmax.bu(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vmax.hu(<8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vmax_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vmax_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmax.hu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vmax.hu(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vmax.wu(<4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vmax_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vmax_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmax.wu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vmax.wu(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmax.du(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vmax_du(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vmax_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmax.du $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmax.du(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vmaxi.bu(<16 x i8>, i32) + +define <16 x i8> @lsx_vmaxi_bu(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_vmaxi_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmaxi.bu $vr0, $vr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vmaxi.bu(<16 x i8> %va, i32 1) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vmaxi.hu(<8 x i16>, i32) + +define <8 x i16> @lsx_vmaxi_hu(<8 x i16> %va) nounwind { +; CHECK-LABEL: lsx_vmaxi_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmaxi.hu $vr0, $vr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vmaxi.hu(<8 x i16> %va, i32 1) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vmaxi.wu(<4 x i32>, i32) + +define <4 x i32> @lsx_vmaxi_wu(<4 x i32> %va) nounwind { +; CHECK-LABEL: lsx_vmaxi_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmaxi.wu $vr0, $vr0, 31 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vmaxi.wu(<4 x i32> %va, i32 31) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmaxi.du(<2 x i64>, i32) + +define <2 x i64> @lsx_vmaxi_du(<2 x i64> %va) nounwind { +; CHECK-LABEL: lsx_vmaxi_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmaxi.du $vr0, $vr0, 31 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmaxi.du(<2 x i64> %va, i32 31) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-min-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-min-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..b73bada4f06fb617d2699b819aaec001d5b76704 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-min-invalid-imm.ll @@ -0,0 +1,129 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vmini.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vmini_b_lo(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vmini.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vmini.b(<16 x i8> %va, i32 -17) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vmini_b_hi(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vmini.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vmini.b(<16 x i8> %va, i32 16) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vmini.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vmini_h_lo(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vmini.h: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vmini.h(<8 x i16> %va, i32 -17) + ret <8 x i16> %res +} + +define <8 x i16> @lsx_vmini_h_hi(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vmini.h: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vmini.h(<8 x i16> %va, i32 16) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vmini.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vmini_w_lo(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vmini.w: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vmini.w(<4 x i32> %va, i32 -17) + ret <4 x i32> %res +} + +define <4 x i32> @lsx_vmini_w_hi(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vmini.w: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vmini.w(<4 x i32> %va, i32 16) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmini.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vmini_d_lo(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vmini.d: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmini.d(<2 x i64> %va, i32 -17) + ret <2 x i64> %res +} + +define <2 x i64> @lsx_vmini_d_hi(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vmini.d: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmini.d(<2 x i64> %va, i32 16) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vmini.bu(<16 x i8>, i32) + +define <16 x i8> @lsx_vmini_bu_lo(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vmini.bu: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vmini.bu(<16 x i8> %va, i32 -1) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vmini_bu_hi(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vmini.bu: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vmini.bu(<16 x i8> %va, i32 32) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vmini.hu(<8 x i16>, i32) + +define <8 x i16> @lsx_vmini_hu_lo(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vmini.hu: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vmini.hu(<8 x i16> %va, i32 -1) + ret <8 x i16> %res +} + +define <8 x i16> @lsx_vmini_hu_hi(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vmini.hu: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vmini.hu(<8 x i16> %va, i32 32) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vmini.wu(<4 x i32>, i32) + +define <4 x i32> @lsx_vmini_wu_lo(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vmini.wu: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vmini.wu(<4 x i32> %va, i32 -1) + ret <4 x i32> %res +} + +define <4 x i32> @lsx_vmini_wu_hi(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vmini.wu: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vmini.wu(<4 x i32> %va, i32 32) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmini.du(<2 x i64>, i32) + +define <2 x i64> @lsx_vmini_du_lo(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vmini.du: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmini.du(<2 x i64> %va, i32 -1) + ret <2 x i64> %res +} + +define <2 x i64> @lsx_vmini_du_hi(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vmini.du: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmini.du(<2 x i64> %va, i32 32) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-min-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-min-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..5d9b98cec4d0e9a8ed817aec0c19f8d5cf25daf1 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-min-non-imm.ll @@ -0,0 +1,73 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vmini.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vmini_b(<16 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vmini.b(<16 x i8> %va, i32 %b) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vmini.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vmini_h(<8 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vmini.h(<8 x i16> %va, i32 %b) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vmini.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vmini_w(<4 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vmini.w(<4 x i32> %va, i32 %b) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmini.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vmini_d(<2 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmini.d(<2 x i64> %va, i32 %b) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vmini.bu(<16 x i8>, i32) + +define <16 x i8> @lsx_vmini_bu(<16 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vmini.bu(<16 x i8> %va, i32 %b) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vmini.hu(<8 x i16>, i32) + +define <8 x i16> @lsx_vmini_hu(<8 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vmini.hu(<8 x i16> %va, i32 %b) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vmini.wu(<4 x i32>, i32) + +define <4 x i32> @lsx_vmini_wu(<4 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vmini.wu(<4 x i32> %va, i32 %b) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmini.du(<2 x i64>, i32) + +define <2 x i64> @lsx_vmini_du(<2 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmini.du(<2 x i64> %va, i32 %b) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-min.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-min.ll new file mode 100644 index 0000000000000000000000000000000000000000..aa12a5ead6a3f7416819a4cb4b688e1b6b1b7c04 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-min.ll @@ -0,0 +1,194 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vmin.b(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vmin_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vmin_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmin.b $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vmin.b(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vmin.h(<8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vmin_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vmin_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmin.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vmin.h(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vmin.w(<4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vmin_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vmin_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmin.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vmin.w(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmin.d(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vmin_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vmin_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmin.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmin.d(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vmini.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vmini_b(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_vmini_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmini.b $vr0, $vr0, 15 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vmini.b(<16 x i8> %va, i32 15) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vmini.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vmini_h(<8 x i16> %va) nounwind { +; CHECK-LABEL: lsx_vmini_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmini.h $vr0, $vr0, 15 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vmini.h(<8 x i16> %va, i32 15) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vmini.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vmini_w(<4 x i32> %va) nounwind { +; CHECK-LABEL: lsx_vmini_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmini.w $vr0, $vr0, -16 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vmini.w(<4 x i32> %va, i32 -16) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmini.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vmini_d(<2 x i64> %va) nounwind { +; CHECK-LABEL: lsx_vmini_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmini.d $vr0, $vr0, -16 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmini.d(<2 x i64> %va, i32 -16) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vmin.bu(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vmin_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vmin_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmin.bu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vmin.bu(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vmin.hu(<8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vmin_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vmin_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmin.hu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vmin.hu(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vmin.wu(<4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vmin_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vmin_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmin.wu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vmin.wu(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmin.du(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vmin_du(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vmin_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmin.du $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmin.du(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vmini.bu(<16 x i8>, i32) + +define <16 x i8> @lsx_vmini_bu(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_vmini_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmini.bu $vr0, $vr0, 31 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vmini.bu(<16 x i8> %va, i32 31) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vmini.hu(<8 x i16>, i32) + +define <8 x i16> @lsx_vmini_hu(<8 x i16> %va) nounwind { +; CHECK-LABEL: lsx_vmini_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmini.hu $vr0, $vr0, 31 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vmini.hu(<8 x i16> %va, i32 31) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vmini.wu(<4 x i32>, i32) + +define <4 x i32> @lsx_vmini_wu(<4 x i32> %va) nounwind { +; CHECK-LABEL: lsx_vmini_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmini.wu $vr0, $vr0, 31 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vmini.wu(<4 x i32> %va, i32 31) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmini.du(<2 x i64>, i32) + +define <2 x i64> @lsx_vmini_du(<2 x i64> %va) nounwind { +; CHECK-LABEL: lsx_vmini_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmini.du $vr0, $vr0, 31 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmini.du(<2 x i64> %va, i32 31) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mod.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mod.ll new file mode 100644 index 0000000000000000000000000000000000000000..6b3dc6865584e5964efb3ca63cbcf29f7601e0ee --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mod.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vmod.b(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vmod_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vmod_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmod.b $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vmod.b(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vmod.h(<8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vmod_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vmod_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmod.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vmod.h(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vmod.w(<4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vmod_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vmod_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmod.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vmod.w(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmod.d(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vmod_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vmod_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmod.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmod.d(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vmod.bu(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vmod_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vmod_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmod.bu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vmod.bu(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vmod.hu(<8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vmod_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vmod_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmod.hu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vmod.hu(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vmod.wu(<4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vmod_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vmod_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmod.wu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vmod.wu(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmod.du(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vmod_du(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vmod_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmod.du $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmod.du(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mskgez.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mskgez.ll new file mode 100644 index 0000000000000000000000000000000000000000..3ecd777aee67858a9e9671f442a64f398e821821 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mskgez.ll @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vmskgez.b(<16 x i8>) + +define <16 x i8> @lsx_vmskgez_b(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_vmskgez_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmskgez.b $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vmskgez.b(<16 x i8> %va) + ret <16 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mskltz.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mskltz.ll new file mode 100644 index 0000000000000000000000000000000000000000..be00c76137c770079f69bb1d9c7695de9fa4c4e2 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mskltz.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vmskltz.b(<16 x i8>) + +define <16 x i8> @lsx_vmskltz_b(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_vmskltz_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmskltz.b $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vmskltz.b(<16 x i8> %va) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vmskltz.h(<8 x i16>) + +define <8 x i16> @lsx_vmskltz_h(<8 x i16> %va) nounwind { +; CHECK-LABEL: lsx_vmskltz_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmskltz.h $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vmskltz.h(<8 x i16> %va) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vmskltz.w(<4 x i32>) + +define <4 x i32> @lsx_vmskltz_w(<4 x i32> %va) nounwind { +; CHECK-LABEL: lsx_vmskltz_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmskltz.w $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vmskltz.w(<4 x i32> %va) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmskltz.d(<2 x i64>) + +define <2 x i64> @lsx_vmskltz_d(<2 x i64> %va) nounwind { +; CHECK-LABEL: lsx_vmskltz_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmskltz.d $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmskltz.d(<2 x i64> %va) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-msknz.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-msknz.ll new file mode 100644 index 0000000000000000000000000000000000000000..02f1752f7190ddfb04a706f99480eba50e70400b --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-msknz.ll @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vmsknz.b(<16 x i8>) + +define <16 x i8> @lsx_vmsknz_b(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_vmsknz_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmsknz.b $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vmsknz.b(<16 x i8> %va) + ret <16 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-msub.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-msub.ll new file mode 100644 index 0000000000000000000000000000000000000000..98684e10c78e5b2f854348d2ef0c0347e8ac2c6b --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-msub.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vmsub.b(<16 x i8>, <16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vmsub_b(<16 x i8> %va, <16 x i8> %vb, <16 x i8> %vc) nounwind { +; CHECK-LABEL: lsx_vmsub_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmsub.b $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vmsub.b(<16 x i8> %va, <16 x i8> %vb, <16 x i8> %vc) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vmsub.h(<8 x i16>, <8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vmsub_h(<8 x i16> %va, <8 x i16> %vb, <8 x i16> %vc) nounwind { +; CHECK-LABEL: lsx_vmsub_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmsub.h $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vmsub.h(<8 x i16> %va, <8 x i16> %vb, <8 x i16> %vc) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vmsub.w(<4 x i32>, <4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vmsub_w(<4 x i32> %va, <4 x i32> %vb, <4 x i32> %vc) nounwind { +; CHECK-LABEL: lsx_vmsub_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmsub.w $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vmsub.w(<4 x i32> %va, <4 x i32> %vb, <4 x i32> %vc) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmsub.d(<2 x i64>, <2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vmsub_d(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) nounwind { +; CHECK-LABEL: lsx_vmsub_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmsub.d $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmsub.d(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-muh.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-muh.ll new file mode 100644 index 0000000000000000000000000000000000000000..a4deb8f8f823e555fe7c1a62d896a68af7d6a5ae --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-muh.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vmuh.b(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vmuh_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vmuh_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmuh.b $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vmuh.b(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vmuh.h(<8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vmuh_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vmuh_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmuh.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vmuh.h(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vmuh.w(<4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vmuh_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vmuh_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmuh.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vmuh.w(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmuh.d(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vmuh_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vmuh_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmuh.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmuh.d(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vmuh.bu(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vmuh_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vmuh_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmuh.bu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vmuh.bu(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vmuh.hu(<8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vmuh_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vmuh_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmuh.hu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vmuh.hu(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vmuh.wu(<4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vmuh_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vmuh_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmuh.wu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vmuh.wu(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmuh.du(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vmuh_du(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vmuh_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmuh.du $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmuh.du(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mul.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mul.ll new file mode 100644 index 0000000000000000000000000000000000000000..aca60d1663b7427de172fe86c3c006aca838b039 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mul.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vmul.b(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vmul_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vmul_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmul.b $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vmul.b(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vmul.h(<8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vmul_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vmul_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmul.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vmul.h(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vmul.w(<4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vmul_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vmul_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmul.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vmul.w(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmul.d(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vmul_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vmul_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmul.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmul.d(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mulw.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mulw.ll new file mode 100644 index 0000000000000000000000000000000000000000..eb55c1f809e3aa4fdf9b561fa806ccea21d07402 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-mulw.ll @@ -0,0 +1,290 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <8 x i16> @llvm.loongarch.lsx.vmulwev.h.b(<16 x i8>, <16 x i8>) + +define <8 x i16> @lsx_vmulwev_h_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vmulwev_h_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmulwev.h.b $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.b(<16 x i8> %va, <16 x i8> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vmulwev.w.h(<8 x i16>, <8 x i16>) + +define <4 x i32> @lsx_vmulwev_w_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vmulwev_w_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmulwev.w.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.h(<8 x i16> %va, <8 x i16> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmulwev.d.w(<4 x i32>, <4 x i32>) + +define <2 x i64> @lsx_vmulwev_d_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vmulwev_d_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmulwev.d.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.w(<4 x i32> %va, <4 x i32> %vb) + ret <2 x i64> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmulwev.q.d(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vmulwev_q_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vmulwev_q_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmulwev.q.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.d(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu(<16 x i8>, <16 x i8>) + +define <8 x i16> @lsx_vmulwev_h_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vmulwev_h_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmulwev.h.bu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu(<16 x i8> %va, <16 x i8> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu(<8 x i16>, <8 x i16>) + +define <4 x i32> @lsx_vmulwev_w_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vmulwev_w_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmulwev.w.hu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu(<8 x i16> %va, <8 x i16> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu(<4 x i32>, <4 x i32>) + +define <2 x i64> @lsx_vmulwev_d_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vmulwev_d_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmulwev.d.wu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu(<4 x i32> %va, <4 x i32> %vb) + ret <2 x i64> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vmulwev_q_du(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vmulwev_q_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmulwev.q.du $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu.b(<16 x i8>, <16 x i8>) + +define <8 x i16> @lsx_vmulwev_h_bu_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vmulwev_h_bu_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmulwev.h.bu.b $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu.b(<16 x i8> %va, <16 x i8> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu.h(<8 x i16>, <8 x i16>) + +define <4 x i32> @lsx_vmulwev_w_hu_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vmulwev_w_hu_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmulwev.w.hu.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu.h(<8 x i16> %va, <8 x i16> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu.w(<4 x i32>, <4 x i32>) + +define <2 x i64> @lsx_vmulwev_d_wu_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vmulwev_d_wu_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmulwev.d.wu.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu.w(<4 x i32> %va, <4 x i32> %vb) + ret <2 x i64> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du.d(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vmulwev_q_du_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vmulwev_q_du_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmulwev.q.du.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du.d(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vmulwod.h.b(<16 x i8>, <16 x i8>) + +define <8 x i16> @lsx_vmulwod_h_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vmulwod_h_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmulwod.h.b $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.b(<16 x i8> %va, <16 x i8> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vmulwod.w.h(<8 x i16>, <8 x i16>) + +define <4 x i32> @lsx_vmulwod_w_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vmulwod_w_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmulwod.w.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.h(<8 x i16> %va, <8 x i16> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmulwod.d.w(<4 x i32>, <4 x i32>) + +define <2 x i64> @lsx_vmulwod_d_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vmulwod_d_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmulwod.d.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.w(<4 x i32> %va, <4 x i32> %vb) + ret <2 x i64> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmulwod.q.d(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vmulwod_q_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vmulwod_q_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmulwod.q.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.d(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu(<16 x i8>, <16 x i8>) + +define <8 x i16> @lsx_vmulwod_h_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vmulwod_h_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmulwod.h.bu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu(<16 x i8> %va, <16 x i8> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu(<8 x i16>, <8 x i16>) + +define <4 x i32> @lsx_vmulwod_w_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vmulwod_w_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmulwod.w.hu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu(<8 x i16> %va, <8 x i16> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu(<4 x i32>, <4 x i32>) + +define <2 x i64> @lsx_vmulwod_d_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vmulwod_d_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmulwod.d.wu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu(<4 x i32> %va, <4 x i32> %vb) + ret <2 x i64> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vmulwod_q_du(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vmulwod_q_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmulwod.q.du $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu.b(<16 x i8>, <16 x i8>) + +define <8 x i16> @lsx_vmulwod_h_bu_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vmulwod_h_bu_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmulwod.h.bu.b $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu.b(<16 x i8> %va, <16 x i8> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu.h(<8 x i16>, <8 x i16>) + +define <4 x i32> @lsx_vmulwod_w_hu_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vmulwod_w_hu_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmulwod.w.hu.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu.h(<8 x i16> %va, <8 x i16> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu.w(<4 x i32>, <4 x i32>) + +define <2 x i64> @lsx_vmulwod_d_wu_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vmulwod_d_wu_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmulwod.d.wu.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu.w(<4 x i32> %va, <4 x i32> %vb) + ret <2 x i64> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du.d(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vmulwod_q_du_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vmulwod_q_du_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmulwod.q.du.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du.d(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-neg.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-neg.ll new file mode 100644 index 0000000000000000000000000000000000000000..43c6e9757614903d1400054e8be858445ff924f3 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-neg.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vneg.b(<16 x i8>) + +define <16 x i8> @lsx_vneg_b(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_vneg_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vneg.b $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vneg.b(<16 x i8> %va) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vneg.h(<8 x i16>) + +define <8 x i16> @lsx_vneg_h(<8 x i16> %va) nounwind { +; CHECK-LABEL: lsx_vneg_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vneg.h $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vneg.h(<8 x i16> %va) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vneg.w(<4 x i32>) + +define <4 x i32> @lsx_vneg_w(<4 x i32> %va) nounwind { +; CHECK-LABEL: lsx_vneg_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vneg.w $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vneg.w(<4 x i32> %va) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vneg.d(<2 x i64>) + +define <2 x i64> @lsx_vneg_d(<2 x i64> %va) nounwind { +; CHECK-LABEL: lsx_vneg_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vneg.d $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vneg.d(<2 x i64> %va) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-nor.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-nor.ll new file mode 100644 index 0000000000000000000000000000000000000000..16619225f2d178c9716313b91f4ecb78c89c1e27 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-nor.ll @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vnor.v(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vnor_v(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vnor_v: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vnor.v $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vnor.v(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-nori-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-nori-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..8c59d8fb9fa5e79783d51485dc7ac17755902c20 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-nori-invalid-imm.ll @@ -0,0 +1,17 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vnori.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vnori_b_lo(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vnori.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vnori.b(<16 x i8> %va, i32 -1) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vnori_b_hi(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vnori.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vnori.b(<16 x i8> %va, i32 256) + ret <16 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-nori-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-nori-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..322a39c106a68623667a8f52e5966a4503990843 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-nori-non-imm.ll @@ -0,0 +1,10 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vnori.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vnori_b(<16 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vnori.b(<16 x i8> %va, i32 %b) + ret <16 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-nori.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-nori.ll new file mode 100644 index 0000000000000000000000000000000000000000..c2388a1e0da377cdf84c73ee4f251a6d32e3de00 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-nori.ll @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vnori.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vnori_b(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_vnori_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vnori.b $vr0, $vr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vnori.b(<16 x i8> %va, i32 1) + ret <16 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-or.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-or.ll new file mode 100644 index 0000000000000000000000000000000000000000..ab557003d1504aaf05a745c61b28ba50ed68b366 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-or.ll @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vor.v(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vor_v(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vor_v: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vor.v $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vor.v(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ori-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ori-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..4a7fc7e109d96be6e34be486c8098f689704ec44 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ori-invalid-imm.ll @@ -0,0 +1,17 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vori.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vori_b_lo(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vori.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vori.b(<16 x i8> %va, i32 -1) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vori_b_hi(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vori.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vori.b(<16 x i8> %va, i32 256) + ret <16 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ori-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ori-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..5644b8581dce72227f71f7073812619269214b03 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ori-non-imm.ll @@ -0,0 +1,10 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vori.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vori_b(<16 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vori.b(<16 x i8> %va, i32 %b) + ret <16 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ori.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ori.ll new file mode 100644 index 0000000000000000000000000000000000000000..85c0f432c54a2e9eedcb8d0a1111935bc92a7d49 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ori.ll @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vori.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vori_b(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_vori_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vori.b $vr0, $vr0, 3 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vori.b(<16 x i8> %va, i32 3) + ret <16 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-orn.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-orn.ll new file mode 100644 index 0000000000000000000000000000000000000000..4528628e02c3c881ad334031675ec12394a7f7ce --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-orn.ll @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vorn.v(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vorn_v(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vorn_v: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vorn.v $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vorn.v(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pack.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pack.ll new file mode 100644 index 0000000000000000000000000000000000000000..70a3620d1757acd2bc1d6273cddc8e5e2e9cb476 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pack.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vpackev.b(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vpackev_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vpackev_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vpackev.b $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vpackev.b(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vpackev.h(<8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vpackev_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vpackev_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vpackev.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vpackev.h(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vpackev.w(<4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vpackev_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vpackev_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vpackev.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vpackev.w(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vpackev.d(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vpackev_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vpackev_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vpackev.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vpackev.d(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vpackod.b(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vpackod_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vpackod_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vpackod.b $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vpackod.b(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vpackod.h(<8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vpackod_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vpackod_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vpackod.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vpackod.h(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vpackod.w(<4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vpackod_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vpackod_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vpackod.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vpackod.w(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vpackod.d(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vpackod_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vpackod_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vpackod.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vpackod.d(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pcnt.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pcnt.ll new file mode 100644 index 0000000000000000000000000000000000000000..431b270ab0a14f57650ac7b8d496688ee40bfd2e --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pcnt.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vpcnt.b(<16 x i8>) + +define <16 x i8> @lsx_vpcnt_b(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_vpcnt_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vpcnt.b $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vpcnt.b(<16 x i8> %va) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vpcnt.h(<8 x i16>) + +define <8 x i16> @lsx_vpcnt_h(<8 x i16> %va) nounwind { +; CHECK-LABEL: lsx_vpcnt_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vpcnt.h $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vpcnt.h(<8 x i16> %va) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vpcnt.w(<4 x i32>) + +define <4 x i32> @lsx_vpcnt_w(<4 x i32> %va) nounwind { +; CHECK-LABEL: lsx_vpcnt_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vpcnt.w $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vpcnt.w(<4 x i32> %va) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vpcnt.d(<2 x i64>) + +define <2 x i64> @lsx_vpcnt_d(<2 x i64> %va) nounwind { +; CHECK-LABEL: lsx_vpcnt_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vpcnt.d $vr0, $vr0 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vpcnt.d(<2 x i64> %va) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-permi-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-permi-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..e439bbae6130db7ce962ca5475e57e0ede1f8a63 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-permi-invalid-imm.ll @@ -0,0 +1,17 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <4 x i32> @llvm.loongarch.lsx.vpermi.w(<4 x i32>, <4 x i32>, i32) + +define <4 x i32> @lsx_vpermi_w_lo(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vpermi.w: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vpermi.w(<4 x i32> %va, <4 x i32> %vb, i32 -1) + ret <4 x i32> %res +} + +define <4 x i32> @lsx_vpermi_w_hi(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vpermi.w: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vpermi.w(<4 x i32> %va, <4 x i32> %vb, i32 256) + ret <4 x i32> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-permi-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-permi-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..bdfc08ed680a9d1de8b2ff4573d039bb2f289eb0 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-permi-non-imm.ll @@ -0,0 +1,10 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <4 x i32> @llvm.loongarch.lsx.vpermi.w(<4 x i32>, <4 x i32>, i32) + +define <4 x i32> @lsx_vpermi_w(<4 x i32> %va, <4 x i32> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vpermi.w(<4 x i32> %va, <4 x i32> %vb, i32 %c) + ret <4 x i32> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-permi.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-permi.ll new file mode 100644 index 0000000000000000000000000000000000000000..b8367d98caf660c5ede172befaa4353510fcfa06 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-permi.ll @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <4 x i32> @llvm.loongarch.lsx.vpermi.w(<4 x i32>, <4 x i32>, i32) + +define <4 x i32> @lsx_vpermi_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vpermi_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vpermi.w $vr0, $vr1, 255 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vpermi.w(<4 x i32> %va, <4 x i32> %vb, i32 255) + ret <4 x i32> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pick.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pick.ll new file mode 100644 index 0000000000000000000000000000000000000000..4ebf29e1409c08f19c7705c9dc4e1ebe8cef8f43 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pick.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vpickev.b(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vpickev_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vpickev_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vpickev.b $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vpickev.b(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vpickev.h(<8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vpickev_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vpickev_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vpickev.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vpickev.h(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vpickev.w(<4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vpickev_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vpickev_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vpickev.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vpickev.w(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vpickev.d(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vpickev_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vpickev_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vpickev.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vpickev.d(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vpickod.b(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vpickod_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vpickod_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vpickod.b $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vpickod.b(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vpickod.h(<8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vpickod_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vpickod_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vpickod.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vpickod.h(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vpickod.w(<4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vpickod_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vpickod_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vpickod.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vpickod.w(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vpickod.d(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vpickod_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vpickod_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vpickod.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vpickod.d(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pickve2gr-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pickve2gr-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..3430c54d21941b7fe89da90b6c3a8ff57d88aa22 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pickve2gr-invalid-imm.ll @@ -0,0 +1,129 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8>, i32) + +define i32 @lsx_vpickve2gr_b_lo(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vpickve2gr.b: argument out of range +entry: + %res = call i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8> %va, i32 -1) + ret i32 %res +} + +define i32 @lsx_vpickve2gr_b_hi(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vpickve2gr.b: argument out of range +entry: + %res = call i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8> %va, i32 16) + ret i32 %res +} + +declare i32 @llvm.loongarch.lsx.vpickve2gr.h(<8 x i16>, i32) + +define i32 @lsx_vpickve2gr_h_lo(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vpickve2gr.h: argument out of range +entry: + %res = call i32 @llvm.loongarch.lsx.vpickve2gr.h(<8 x i16> %va, i32 -1) + ret i32 %res +} + +define i32 @lsx_vpickve2gr_h_hi(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vpickve2gr.h: argument out of range +entry: + %res = call i32 @llvm.loongarch.lsx.vpickve2gr.h(<8 x i16> %va, i32 8) + ret i32 %res +} + +declare i32 @llvm.loongarch.lsx.vpickve2gr.w(<4 x i32>, i32) + +define i32 @lsx_vpickve2gr_w_lo(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vpickve2gr.w: argument out of range +entry: + %res = call i32 @llvm.loongarch.lsx.vpickve2gr.w(<4 x i32> %va, i32 -1) + ret i32 %res +} + +define i32 @lsx_vpickve2gr_w_hi(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vpickve2gr.w: argument out of range +entry: + %res = call i32 @llvm.loongarch.lsx.vpickve2gr.w(<4 x i32> %va, i32 4) + ret i32 %res +} + +declare i64 @llvm.loongarch.lsx.vpickve2gr.d(<2 x i64>, i32) + +define i64 @lsx_vpickve2gr_d_lo(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vpickve2gr.d: argument out of range +entry: + %res = call i64 @llvm.loongarch.lsx.vpickve2gr.d(<2 x i64> %va, i32 -1) + ret i64 %res +} + +define i64 @lsx_vpickve2gr_d_hi(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vpickve2gr.d: argument out of range +entry: + %res = call i64 @llvm.loongarch.lsx.vpickve2gr.d(<2 x i64> %va, i32 2) + ret i64 %res +} + +declare i32 @llvm.loongarch.lsx.vpickve2gr.bu(<16 x i8>, i32) + +define i32 @lsx_vpickve2gr_bu_lo(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vpickve2gr.bu: argument out of range +entry: + %res = call i32 @llvm.loongarch.lsx.vpickve2gr.bu(<16 x i8> %va, i32 -1) + ret i32 %res +} + +define i32 @lsx_vpickve2gr_bu_hi(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vpickve2gr.bu: argument out of range +entry: + %res = call i32 @llvm.loongarch.lsx.vpickve2gr.bu(<16 x i8> %va, i32 16) + ret i32 %res +} + +declare i32 @llvm.loongarch.lsx.vpickve2gr.hu(<8 x i16>, i32) + +define i32 @lsx_vpickve2gr_hu_lo(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vpickve2gr.hu: argument out of range +entry: + %res = call i32 @llvm.loongarch.lsx.vpickve2gr.hu(<8 x i16> %va, i32 -1) + ret i32 %res +} + +define i32 @lsx_vpickve2gr_hu_hi(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vpickve2gr.hu: argument out of range +entry: + %res = call i32 @llvm.loongarch.lsx.vpickve2gr.hu(<8 x i16> %va, i32 8) + ret i32 %res +} + +declare i32 @llvm.loongarch.lsx.vpickve2gr.wu(<4 x i32>, i32) + +define i32 @lsx_vpickve2gr_wu_lo(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vpickve2gr.wu: argument out of range +entry: + %res = call i32 @llvm.loongarch.lsx.vpickve2gr.wu(<4 x i32> %va, i32 -1) + ret i32 %res +} + +define i32 @lsx_vpickve2gr_wu_hi(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vpickve2gr.wu: argument out of range +entry: + %res = call i32 @llvm.loongarch.lsx.vpickve2gr.wu(<4 x i32> %va, i32 4) + ret i32 %res +} + +declare i64 @llvm.loongarch.lsx.vpickve2gr.du(<2 x i64>, i32) + +define i64 @lsx_vpickve2gr_du_lo(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vpickve2gr.du: argument out of range +entry: + %res = call i64 @llvm.loongarch.lsx.vpickve2gr.du(<2 x i64> %va, i32 -1) + ret i64 %res +} + +define i64 @lsx_vpickve2gr_du_hi(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vpickve2gr.du: argument out of range +entry: + %res = call i64 @llvm.loongarch.lsx.vpickve2gr.du(<2 x i64> %va, i32 2) + ret i64 %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pickve2gr-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pickve2gr-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..6dd3c1f27a81efa7933036d8a743c57b44f32e96 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pickve2gr-non-imm.ll @@ -0,0 +1,73 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8>, i32) + +define i32 @lsx_vpickve2gr_b(<16 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8> %va, i32 %b) + ret i32 %res +} + +declare i32 @llvm.loongarch.lsx.vpickve2gr.h(<8 x i16>, i32) + +define i32 @lsx_vpickve2gr_h(<8 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call i32 @llvm.loongarch.lsx.vpickve2gr.h(<8 x i16> %va, i32 %b) + ret i32 %res +} + +declare i32 @llvm.loongarch.lsx.vpickve2gr.w(<4 x i32>, i32) + +define i32 @lsx_vpickve2gr_w(<4 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call i32 @llvm.loongarch.lsx.vpickve2gr.w(<4 x i32> %va, i32 %b) + ret i32 %res +} + +declare i64 @llvm.loongarch.lsx.vpickve2gr.d(<2 x i64>, i32) + +define i64 @lsx_vpickve2gr_d(<2 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call i64 @llvm.loongarch.lsx.vpickve2gr.d(<2 x i64> %va, i32 %b) + ret i64 %res +} + +declare i32 @llvm.loongarch.lsx.vpickve2gr.bu(<16 x i8>, i32) + +define i32 @lsx_vpickve2gr_bu(<16 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call i32 @llvm.loongarch.lsx.vpickve2gr.bu(<16 x i8> %va, i32 %b) + ret i32 %res +} + +declare i32 @llvm.loongarch.lsx.vpickve2gr.hu(<8 x i16>, i32) + +define i32 @lsx_vpickve2gr_hu(<8 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call i32 @llvm.loongarch.lsx.vpickve2gr.hu(<8 x i16> %va, i32 %b) + ret i32 %res +} + +declare i32 @llvm.loongarch.lsx.vpickve2gr.wu(<4 x i32>, i32) + +define i32 @lsx_vpickve2gr_wu(<4 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call i32 @llvm.loongarch.lsx.vpickve2gr.wu(<4 x i32> %va, i32 %b) + ret i32 %res +} + +declare i64 @llvm.loongarch.lsx.vpickve2gr.du(<2 x i64>, i32) + +define i64 @lsx_vpickve2gr_du(<2 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call i64 @llvm.loongarch.lsx.vpickve2gr.du(<2 x i64> %va, i32 %b) + ret i64 %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pickve2gr.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pickve2gr.ll new file mode 100644 index 0000000000000000000000000000000000000000..ed56d30ce3c46ab9dde842d89b4ae3761f0fa87a --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-pickve2gr.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8>, i32) + +define i32 @lsx_vpickve2gr_b(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_vpickve2gr_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 15 +; CHECK-NEXT: ret +entry: + %res = call i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8> %va, i32 15) + ret i32 %res +} + +declare i32 @llvm.loongarch.lsx.vpickve2gr.h(<8 x i16>, i32) + +define i32 @lsx_vpickve2gr_h(<8 x i16> %va) nounwind { +; CHECK-LABEL: lsx_vpickve2gr_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 7 +; CHECK-NEXT: ret +entry: + %res = call i32 @llvm.loongarch.lsx.vpickve2gr.h(<8 x i16> %va, i32 7) + ret i32 %res +} + +declare i32 @llvm.loongarch.lsx.vpickve2gr.w(<4 x i32>, i32) + +define i32 @lsx_vpickve2gr_w(<4 x i32> %va) nounwind { +; CHECK-LABEL: lsx_vpickve2gr_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 3 +; CHECK-NEXT: ret +entry: + %res = call i32 @llvm.loongarch.lsx.vpickve2gr.w(<4 x i32> %va, i32 3) + ret i32 %res +} + +declare i64 @llvm.loongarch.lsx.vpickve2gr.d(<2 x i64>, i32) + +define i64 @lsx_vpickve2gr_d(<2 x i64> %va) nounwind { +; CHECK-LABEL: lsx_vpickve2gr_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 1 +; CHECK-NEXT: ret +entry: + %res = call i64 @llvm.loongarch.lsx.vpickve2gr.d(<2 x i64> %va, i32 1) + ret i64 %res +} + +declare i32 @llvm.loongarch.lsx.vpickve2gr.bu(<16 x i8>, i32) + +define i32 @lsx_vpickve2gr_bu(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_vpickve2gr_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vpickve2gr.bu $a0, $vr0, 15 +; CHECK-NEXT: ret +entry: + %res = call i32 @llvm.loongarch.lsx.vpickve2gr.bu(<16 x i8> %va, i32 15) + ret i32 %res +} + +declare i32 @llvm.loongarch.lsx.vpickve2gr.hu(<8 x i16>, i32) + +define i32 @lsx_vpickve2gr_hu(<8 x i16> %va) nounwind { +; CHECK-LABEL: lsx_vpickve2gr_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 7 +; CHECK-NEXT: ret +entry: + %res = call i32 @llvm.loongarch.lsx.vpickve2gr.hu(<8 x i16> %va, i32 7) + ret i32 %res +} + +declare i32 @llvm.loongarch.lsx.vpickve2gr.wu(<4 x i32>, i32) + +define i32 @lsx_vpickve2gr_wu(<4 x i32> %va) nounwind { +; CHECK-LABEL: lsx_vpickve2gr_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vpickve2gr.wu $a0, $vr0, 3 +; CHECK-NEXT: ret +entry: + %res = call i32 @llvm.loongarch.lsx.vpickve2gr.wu(<4 x i32> %va, i32 3) + ret i32 %res +} + +declare i64 @llvm.loongarch.lsx.vpickve2gr.du(<2 x i64>, i32) + +define i64 @lsx_vpickve2gr_du(<2 x i64> %va) nounwind { +; CHECK-LABEL: lsx_vpickve2gr_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vpickve2gr.du $a0, $vr0, 1 +; CHECK-NEXT: ret +entry: + %res = call i64 @llvm.loongarch.lsx.vpickve2gr.du(<2 x i64> %va, i32 1) + ret i64 %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replgr2vr.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replgr2vr.ll new file mode 100644 index 0000000000000000000000000000000000000000..091f1c98c2289a1f2492f70c629ecc124feb98f9 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replgr2vr.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vreplgr2vr.b(i32) + +define <16 x i8> @lsx_vreplgr2vr_b(i32 %a) nounwind { +; CHECK-LABEL: lsx_vreplgr2vr_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vreplgr2vr.b $vr0, $a0 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vreplgr2vr.b(i32 %a) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vreplgr2vr.h(i32) + +define <8 x i16> @lsx_vreplgr2vr_h(i32 %a) nounwind { +; CHECK-LABEL: lsx_vreplgr2vr_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vreplgr2vr.h $vr0, $a0 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vreplgr2vr.h(i32 %a) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vreplgr2vr.w(i32) + +define <4 x i32> @lsx_vreplgr2vr_w(i32 %a) nounwind { +; CHECK-LABEL: lsx_vreplgr2vr_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vreplgr2vr.w $vr0, $a0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vreplgr2vr.w(i32 %a) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vreplgr2vr.d(i64) + +define <2 x i64> @lsx_vreplgr2vr_d(i64 %a) nounwind { +; CHECK-LABEL: lsx_vreplgr2vr_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vreplgr2vr.d $vr0, $a0 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vreplgr2vr.d(i64 %a) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replve.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replve.ll new file mode 100644 index 0000000000000000000000000000000000000000..3ba184dad052b925a6becbe215c2b02fa37103c7 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replve.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vreplve.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vreplve_b(<16 x i8> %va, i32 %b) nounwind { +; CHECK-LABEL: lsx_vreplve_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vreplve.b $vr0, $vr0, $a0 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vreplve.b(<16 x i8> %va, i32 %b) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vreplve.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vreplve_h(<8 x i16> %va, i32 %b) nounwind { +; CHECK-LABEL: lsx_vreplve_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vreplve.h $vr0, $vr0, $a0 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vreplve.h(<8 x i16> %va, i32 %b) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vreplve.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vreplve_w(<4 x i32> %va, i32 %b) nounwind { +; CHECK-LABEL: lsx_vreplve_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vreplve.w $vr0, $vr0, $a0 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vreplve.w(<4 x i32> %va, i32 %b) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vreplve.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vreplve_d(<2 x i64> %va, i32 %b) nounwind { +; CHECK-LABEL: lsx_vreplve_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vreplve.d $vr0, $vr0, $a0 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vreplve.d(<2 x i64> %va, i32 %b) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replvei-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replvei-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..d625441122a6679487d26bbd77ea45fe8c532cdd --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replvei-invalid-imm.ll @@ -0,0 +1,65 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vreplvei.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vreplvei_b_lo(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vreplvei.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vreplvei.b(<16 x i8> %va, i32 -1) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vreplvei_b_hi(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vreplvei.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vreplvei.b(<16 x i8> %va, i32 16) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vreplvei.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vreplvei_h_lo(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vreplvei.h: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vreplvei.h(<8 x i16> %va, i32 -1) + ret <8 x i16> %res +} + +define <8 x i16> @lsx_vreplvei_h_hi(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vreplvei.h: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vreplvei.h(<8 x i16> %va, i32 8) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vreplvei.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vreplvei_w_lo(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vreplvei.w: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vreplvei.w(<4 x i32> %va, i32 -1) + ret <4 x i32> %res +} + +define <4 x i32> @lsx_vreplvei_w_hi(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vreplvei.w: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vreplvei.w(<4 x i32> %va, i32 4) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vreplvei.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vreplvei_d_lo(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vreplvei.d: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vreplvei.d(<2 x i64> %va, i32 -1) + ret <2 x i64> %res +} + +define <2 x i64> @lsx_vreplvei_d_hi(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vreplvei.d: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vreplvei.d(<2 x i64> %va, i32 2) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replvei-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replvei-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..3d271bb2b3073d8cff7467131f6a655b4362860c --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replvei-non-imm.ll @@ -0,0 +1,37 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vreplvei.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vreplvei_b(<16 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vreplvei.b(<16 x i8> %va, i32 %b) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vreplvei.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vreplvei_h(<8 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vreplvei.h(<8 x i16> %va, i32 %b) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vreplvei.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vreplvei_w(<4 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vreplvei.w(<4 x i32> %va, i32 %b) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vreplvei.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vreplvei_d(<2 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vreplvei.d(<2 x i64> %va, i32 %b) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replvei.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replvei.ll new file mode 100644 index 0000000000000000000000000000000000000000..9b8af1878cb83d52308bc77865bcea4fcc4c1167 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-replvei.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vreplvei.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vreplvei_b(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_vreplvei_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vreplvei.b $vr0, $vr0, 15 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vreplvei.b(<16 x i8> %va, i32 15) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vreplvei.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vreplvei_h(<8 x i16> %va) nounwind { +; CHECK-LABEL: lsx_vreplvei_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vreplvei.h $vr0, $vr0, 7 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vreplvei.h(<8 x i16> %va, i32 7) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vreplvei.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vreplvei_w(<4 x i32> %va) nounwind { +; CHECK-LABEL: lsx_vreplvei_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vreplvei.w $vr0, $vr0, 3 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vreplvei.w(<4 x i32> %va, i32 3) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vreplvei.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vreplvei_d(<2 x i64> %va) nounwind { +; CHECK-LABEL: lsx_vreplvei_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vreplvei.d $vr0, $vr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vreplvei.d(<2 x i64> %va, i32 1) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-rotr-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-rotr-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..3c53b36672ad32e36ba0b47e3d9edf91c8283c65 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-rotr-invalid-imm.ll @@ -0,0 +1,65 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vrotri.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vrotri_b_lo(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vrotri.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vrotri.b(<16 x i8> %va, i32 -1) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vrotri_b_hi(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vrotri.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vrotri.b(<16 x i8> %va, i32 8) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vrotri.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vrotri_h_lo(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vrotri.h: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vrotri.h(<8 x i16> %va, i32 -1) + ret <8 x i16> %res +} + +define <8 x i16> @lsx_vrotri_h_hi(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vrotri.h: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vrotri.h(<8 x i16> %va, i32 16) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vrotri.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vrotri_w_lo(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vrotri.w: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vrotri.w(<4 x i32> %va, i32 -1) + ret <4 x i32> %res +} + +define <4 x i32> @lsx_vrotri_w_hi(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vrotri.w: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vrotri.w(<4 x i32> %va, i32 32) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vrotri.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vrotri_d_lo(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vrotri.d: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vrotri.d(<2 x i64> %va, i32 -1) + ret <2 x i64> %res +} + +define <2 x i64> @lsx_vrotri_d_hi(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vrotri.d: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vrotri.d(<2 x i64> %va, i32 64) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-rotr-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-rotr-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..fd8ba3a1c633f5b0b2cdb4cc07145d1fb74b5f42 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-rotr-non-imm.ll @@ -0,0 +1,37 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vrotri.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vrotri_b(<16 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vrotri.b(<16 x i8> %va, i32 %b) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vrotri.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vrotri_h(<8 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vrotri.h(<8 x i16> %va, i32 %b) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vrotri.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vrotri_w(<4 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vrotri.w(<4 x i32> %va, i32 %b) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vrotri.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vrotri_d(<2 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vrotri.d(<2 x i64> %va, i32 %b) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-rotr.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-rotr.ll new file mode 100644 index 0000000000000000000000000000000000000000..df8650677147b6aac7095bcd7cacdea0abe0ad77 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-rotr.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vrotr.b(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vrotr_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vrotr_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vrotr.b $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vrotr.b(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vrotr.h(<8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vrotr_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vrotr_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vrotr.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vrotr.h(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vrotr.w(<4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vrotr_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vrotr_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vrotr.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vrotr.w(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vrotr.d(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vrotr_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vrotr_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vrotr.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vrotr.d(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vrotri.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vrotri_b(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_vrotri_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vrotri.b $vr0, $vr0, 7 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vrotri.b(<16 x i8> %va, i32 7) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vrotri.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vrotri_h(<8 x i16> %va) nounwind { +; CHECK-LABEL: lsx_vrotri_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vrotri.h $vr0, $vr0, 15 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vrotri.h(<8 x i16> %va, i32 15) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vrotri.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vrotri_w(<4 x i32> %va) nounwind { +; CHECK-LABEL: lsx_vrotri_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vrotri.w $vr0, $vr0, 31 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vrotri.w(<4 x i32> %va, i32 31) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vrotri.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vrotri_d(<2 x i64> %va) nounwind { +; CHECK-LABEL: lsx_vrotri_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vrotri.d $vr0, $vr0, 63 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vrotri.d(<2 x i64> %va, i32 63) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sadd.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sadd.ll new file mode 100644 index 0000000000000000000000000000000000000000..a54f955766dfe6441b46b86f24bc4bbfcb69301a --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sadd.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vsadd.b(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vsadd_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vsadd_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsadd.b $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsadd.b(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsadd.h(<8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vsadd_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vsadd_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsadd.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsadd.h(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsadd.w(<4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vsadd_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vsadd_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsadd.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsadd.w(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsadd.d(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vsadd_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vsadd_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsadd.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsadd.d(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vsadd.bu(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vsadd_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vsadd_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsadd.bu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsadd.bu(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsadd.hu(<8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vsadd_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vsadd_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsadd.hu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsadd.hu(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsadd.wu(<4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vsadd_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vsadd_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsadd.wu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsadd.wu(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsadd.du(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vsadd_du(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vsadd_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsadd.du $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsadd.du(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sat-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sat-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..45fa4e43be198d38c6a5f471bb99df14061498f1 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sat-invalid-imm.ll @@ -0,0 +1,129 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vsat.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vsat_b_lo(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsat.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsat.b(<16 x i8> %va, i32 -1) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vsat_b_hi(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsat.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsat.b(<16 x i8> %va, i32 8) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsat.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vsat_h_lo(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsat.h: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsat.h(<8 x i16> %va, i32 -1) + ret <8 x i16> %res +} + +define <8 x i16> @lsx_vsat_h_hi(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsat.h: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsat.h(<8 x i16> %va, i32 16) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsat.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vsat_w_lo(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsat.w: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsat.w(<4 x i32> %va, i32 -1) + ret <4 x i32> %res +} + +define <4 x i32> @lsx_vsat_w_hi(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsat.w: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsat.w(<4 x i32> %va, i32 32) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsat.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vsat_d_lo(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsat.d: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsat.d(<2 x i64> %va, i32 -1) + ret <2 x i64> %res +} + +define <2 x i64> @lsx_vsat_d_hi(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsat.d: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsat.d(<2 x i64> %va, i32 64) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vsat.bu(<16 x i8>, i32) + +define <16 x i8> @lsx_vsat_bu_lo(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsat.bu: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsat.bu(<16 x i8> %va, i32 -1) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vsat_bu_hi(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsat.bu: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsat.bu(<16 x i8> %va, i32 8) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsat.hu(<8 x i16>, i32) + +define <8 x i16> @lsx_vsat_hu_lo(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsat.hu: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsat.hu(<8 x i16> %va, i32 -1) + ret <8 x i16> %res +} + +define <8 x i16> @lsx_vsat_hu_hi(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsat.hu: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsat.hu(<8 x i16> %va, i32 16) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsat.wu(<4 x i32>, i32) + +define <4 x i32> @lsx_vsat_wu_lo(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsat.wu: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsat.wu(<4 x i32> %va, i32 -1) + ret <4 x i32> %res +} + +define <4 x i32> @lsx_vsat_wu_hi(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsat.wu: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsat.wu(<4 x i32> %va, i32 32) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsat.du(<2 x i64>, i32) + +define <2 x i64> @lsx_vsat_du_lo(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsat.du: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsat.du(<2 x i64> %va, i32 -1) + ret <2 x i64> %res +} + +define <2 x i64> @lsx_vsat_du_hi(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsat.du: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsat.du(<2 x i64> %va, i32 64) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sat-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sat-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..afdbe0c1ce0b991df290a8f59d445c627dfd1f63 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sat-non-imm.ll @@ -0,0 +1,73 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vsat.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vsat_b(<16 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsat.b(<16 x i8> %va, i32 %b) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsat.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vsat_h(<8 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsat.h(<8 x i16> %va, i32 %b) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsat.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vsat_w(<4 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsat.w(<4 x i32> %va, i32 %b) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsat.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vsat_d(<2 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsat.d(<2 x i64> %va, i32 %b) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vsat.bu(<16 x i8>, i32) + +define <16 x i8> @lsx_vsat_bu(<16 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsat.bu(<16 x i8> %va, i32 %b) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsat.hu(<8 x i16>, i32) + +define <8 x i16> @lsx_vsat_hu(<8 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsat.hu(<8 x i16> %va, i32 %b) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsat.wu(<4 x i32>, i32) + +define <4 x i32> @lsx_vsat_wu(<4 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsat.wu(<4 x i32> %va, i32 %b) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsat.du(<2 x i64>, i32) + +define <2 x i64> @lsx_vsat_du(<2 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsat.du(<2 x i64> %va, i32 %b) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sat.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sat.ll new file mode 100644 index 0000000000000000000000000000000000000000..4286842a63b98e2a9cb879fe07a434c28b67140d --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sat.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vsat.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vsat_b(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_vsat_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsat.b $vr0, $vr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsat.b(<16 x i8> %va, i32 1) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsat.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vsat_h(<8 x i16> %va) nounwind { +; CHECK-LABEL: lsx_vsat_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsat.h $vr0, $vr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsat.h(<8 x i16> %va, i32 1) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsat.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vsat_w(<4 x i32> %va) nounwind { +; CHECK-LABEL: lsx_vsat_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsat.w $vr0, $vr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsat.w(<4 x i32> %va, i32 1) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsat.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vsat_d(<2 x i64> %va) nounwind { +; CHECK-LABEL: lsx_vsat_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsat.d $vr0, $vr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsat.d(<2 x i64> %va, i32 1) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vsat.bu(<16 x i8>, i32) + +define <16 x i8> @lsx_vsat_bu(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_vsat_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsat.bu $vr0, $vr0, 7 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsat.bu(<16 x i8> %va, i32 7) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsat.hu(<8 x i16>, i32) + +define <8 x i16> @lsx_vsat_hu(<8 x i16> %va) nounwind { +; CHECK-LABEL: lsx_vsat_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsat.hu $vr0, $vr0, 15 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsat.hu(<8 x i16> %va, i32 15) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsat.wu(<4 x i32>, i32) + +define <4 x i32> @lsx_vsat_wu(<4 x i32> %va) nounwind { +; CHECK-LABEL: lsx_vsat_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsat.wu $vr0, $vr0, 31 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsat.wu(<4 x i32> %va, i32 31) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsat.du(<2 x i64>, i32) + +define <2 x i64> @lsx_vsat_du(<2 x i64> %va) nounwind { +; CHECK-LABEL: lsx_vsat_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsat.du $vr0, $vr0, 63 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsat.du(<2 x i64> %va, i32 63) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-seq-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-seq-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..220398ff28cda67db836c34ed350270a68ccd27c --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-seq-invalid-imm.ll @@ -0,0 +1,65 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vseqi.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vseqi_b_lo(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vseqi.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vseqi.b(<16 x i8> %va, i32 -17) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vseqi_b_hi(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vseqi.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vseqi.b(<16 x i8> %va, i32 16) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vseqi.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vseqi_h_lo(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vseqi.h: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vseqi.h(<8 x i16> %va, i32 -17) + ret <8 x i16> %res +} + +define <8 x i16> @lsx_vseqi_h_hi(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vseqi.h: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vseqi.h(<8 x i16> %va, i32 16) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vseqi.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vseqi_w_lo(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vseqi.w: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vseqi.w(<4 x i32> %va, i32 -17) + ret <4 x i32> %res +} + +define <4 x i32> @lsx_vseqi_w_hi(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vseqi.w: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vseqi.w(<4 x i32> %va, i32 16) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vseqi.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vseqi_d_lo(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vseqi.d: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vseqi.d(<2 x i64> %va, i32 -17) + ret <2 x i64> %res +} + +define <2 x i64> @lsx_vseqi_d_hi(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vseqi.d: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vseqi.d(<2 x i64> %va, i32 16) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-seq-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-seq-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..5fa1dd30475ce3679646025f74c86b00f64765e3 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-seq-non-imm.ll @@ -0,0 +1,37 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vseqi.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vseqi_b(<16 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vseqi.b(<16 x i8> %va, i32 %b) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vseqi.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vseqi_h(<8 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vseqi.h(<8 x i16> %va, i32 %b) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vseqi.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vseqi_w(<4 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vseqi.w(<4 x i32> %va, i32 %b) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vseqi.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vseqi_d(<2 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vseqi.d(<2 x i64> %va, i32 %b) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-seq.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-seq.ll new file mode 100644 index 0000000000000000000000000000000000000000..3cb4acd824393be9e7e8c919ade83e47e043c1d1 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-seq.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vseq.b(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vseq_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vseq_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vseq.b $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vseq.b(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vseq.h(<8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vseq_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vseq_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vseq.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vseq.h(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vseq.w(<4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vseq_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vseq_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vseq.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vseq.w(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vseq.d(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vseq_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vseq_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vseq.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vseq.d(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vseqi.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vseqi_b(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_vseqi_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vseqi.b $vr0, $vr0, 15 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vseqi.b(<16 x i8> %va, i32 15) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vseqi.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vseqi_h(<8 x i16> %va) nounwind { +; CHECK-LABEL: lsx_vseqi_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vseqi.h $vr0, $vr0, 15 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vseqi.h(<8 x i16> %va, i32 15) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vseqi.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vseqi_w(<4 x i32> %va) nounwind { +; CHECK-LABEL: lsx_vseqi_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vseqi.w $vr0, $vr0, -16 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vseqi.w(<4 x i32> %va, i32 -16) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vseqi.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vseqi_d(<2 x i64> %va) nounwind { +; CHECK-LABEL: lsx_vseqi_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vseqi.d $vr0, $vr0, -16 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vseqi.d(<2 x i64> %va, i32 -16) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-set.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-set.ll new file mode 100644 index 0000000000000000000000000000000000000000..3188fb4e2c2ef7e3522ffcc1ac6b6a9ec0b539bc --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-set.ll @@ -0,0 +1,38 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare i32 @llvm.loongarch.lsx.bz.v(<16 x i8>) + +define i32 @lsx_bz_v(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_bz_v: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vseteqz.v $fcc0, $vr0 +; CHECK-NEXT: bcnez $fcc0, .LBB0_2 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: addi.w $a0, $zero, 0 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB0_2: # %entry +; CHECK-NEXT: addi.w $a0, $zero, 1 +; CHECK-NEXT: ret +entry: + %res = call i32 @llvm.loongarch.lsx.bz.v(<16 x i8> %va) + ret i32 %res +} + +declare i32 @llvm.loongarch.lsx.bnz.v(<16 x i8>) + +define i32 @lsx_bnz_v(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_bnz_v: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetnez.v $fcc0, $vr0 +; CHECK-NEXT: bcnez $fcc0, .LBB1_2 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: addi.w $a0, $zero, 0 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB1_2: # %entry +; CHECK-NEXT: addi.w $a0, $zero, 1 +; CHECK-NEXT: ret +entry: + %res = call i32 @llvm.loongarch.lsx.bnz.v(<16 x i8> %va) + ret i32 %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-setallnez.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-setallnez.ll new file mode 100644 index 0000000000000000000000000000000000000000..22e01922e87bb3b6b22af6a7601719e14e1d7bd2 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-setallnez.ll @@ -0,0 +1,74 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare i32 @llvm.loongarch.lsx.bnz.b(<16 x i8>) + +define i32 @lsx_bnz_b(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_bnz_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetallnez.b $fcc0, $vr0 +; CHECK-NEXT: bcnez $fcc0, .LBB0_2 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: addi.w $a0, $zero, 0 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB0_2: # %entry +; CHECK-NEXT: addi.w $a0, $zero, 1 +; CHECK-NEXT: ret +entry: + %res = call i32 @llvm.loongarch.lsx.bnz.b(<16 x i8> %va) + ret i32 %res +} + +declare i32 @llvm.loongarch.lsx.bnz.h(<8 x i16>) + +define i32 @lsx_bnz_h(<8 x i16> %va) nounwind { +; CHECK-LABEL: lsx_bnz_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetallnez.h $fcc0, $vr0 +; CHECK-NEXT: bcnez $fcc0, .LBB1_2 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: addi.w $a0, $zero, 0 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB1_2: # %entry +; CHECK-NEXT: addi.w $a0, $zero, 1 +; CHECK-NEXT: ret +entry: + %res = call i32 @llvm.loongarch.lsx.bnz.h(<8 x i16> %va) + ret i32 %res +} + +declare i32 @llvm.loongarch.lsx.bnz.w(<4 x i32>) + +define i32 @lsx_bnz_w(<4 x i32> %va) nounwind { +; CHECK-LABEL: lsx_bnz_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetallnez.w $fcc0, $vr0 +; CHECK-NEXT: bcnez $fcc0, .LBB2_2 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: addi.w $a0, $zero, 0 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB2_2: # %entry +; CHECK-NEXT: addi.w $a0, $zero, 1 +; CHECK-NEXT: ret +entry: + %res = call i32 @llvm.loongarch.lsx.bnz.w(<4 x i32> %va) + ret i32 %res +} + +declare i32 @llvm.loongarch.lsx.bnz.d(<2 x i64>) + +define i32 @lsx_bnz_d(<2 x i64> %va) nounwind { +; CHECK-LABEL: lsx_bnz_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetallnez.d $fcc0, $vr0 +; CHECK-NEXT: bcnez $fcc0, .LBB3_2 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: addi.w $a0, $zero, 0 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB3_2: # %entry +; CHECK-NEXT: addi.w $a0, $zero, 1 +; CHECK-NEXT: ret +entry: + %res = call i32 @llvm.loongarch.lsx.bnz.d(<2 x i64> %va) + ret i32 %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-setanyeqz.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-setanyeqz.ll new file mode 100644 index 0000000000000000000000000000000000000000..96c79c10e46889af9b13c36e01529c63b4966e77 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-setanyeqz.ll @@ -0,0 +1,74 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare i32 @llvm.loongarch.lsx.bz.b(<16 x i8>) + +define i32 @lsx_bz_b(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_bz_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetanyeqz.b $fcc0, $vr0 +; CHECK-NEXT: bcnez $fcc0, .LBB0_2 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: addi.w $a0, $zero, 0 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB0_2: # %entry +; CHECK-NEXT: addi.w $a0, $zero, 1 +; CHECK-NEXT: ret +entry: + %res = call i32 @llvm.loongarch.lsx.bz.b(<16 x i8> %va) + ret i32 %res +} + +declare i32 @llvm.loongarch.lsx.bz.h(<8 x i16>) + +define i32 @lsx_bz_h(<8 x i16> %va) nounwind { +; CHECK-LABEL: lsx_bz_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetanyeqz.h $fcc0, $vr0 +; CHECK-NEXT: bcnez $fcc0, .LBB1_2 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: addi.w $a0, $zero, 0 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB1_2: # %entry +; CHECK-NEXT: addi.w $a0, $zero, 1 +; CHECK-NEXT: ret +entry: + %res = call i32 @llvm.loongarch.lsx.bz.h(<8 x i16> %va) + ret i32 %res +} + +declare i32 @llvm.loongarch.lsx.bz.w(<4 x i32>) + +define i32 @lsx_bz_w(<4 x i32> %va) nounwind { +; CHECK-LABEL: lsx_bz_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetanyeqz.w $fcc0, $vr0 +; CHECK-NEXT: bcnez $fcc0, .LBB2_2 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: addi.w $a0, $zero, 0 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB2_2: # %entry +; CHECK-NEXT: addi.w $a0, $zero, 1 +; CHECK-NEXT: ret +entry: + %res = call i32 @llvm.loongarch.lsx.bz.w(<4 x i32> %va) + ret i32 %res +} + +declare i32 @llvm.loongarch.lsx.bz.d(<2 x i64>) + +define i32 @lsx_bz_d(<2 x i64> %va) nounwind { +; CHECK-LABEL: lsx_bz_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetanyeqz.d $fcc0, $vr0 +; CHECK-NEXT: bcnez $fcc0, .LBB3_2 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: addi.w $a0, $zero, 0 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB3_2: # %entry +; CHECK-NEXT: addi.w $a0, $zero, 1 +; CHECK-NEXT: ret +entry: + %res = call i32 @llvm.loongarch.lsx.bz.d(<2 x i64> %va) + ret i32 %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf.ll new file mode 100644 index 0000000000000000000000000000000000000000..f5d516521e45f106d90900cbbedfd53823d9c2f7 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vshuf.b(<16 x i8>, <16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vshuf_b(<16 x i8> %va, <16 x i8> %vb, <16 x i8> %vc) nounwind { +; CHECK-LABEL: lsx_vshuf_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vshuf.b $vr0, $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vshuf.b(<16 x i8> %va, <16 x i8> %vb, <16 x i8> %vc) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vshuf.h(<8 x i16>, <8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vshuf_h(<8 x i16> %va, <8 x i16> %vb, <8 x i16> %vc) nounwind { +; CHECK-LABEL: lsx_vshuf_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vshuf.h $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vshuf.h(<8 x i16> %va, <8 x i16> %vb, <8 x i16> %vc) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vshuf.w(<4 x i32>, <4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vshuf_w(<4 x i32> %va, <4 x i32> %vb, <4 x i32> %vc) nounwind { +; CHECK-LABEL: lsx_vshuf_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vshuf.w $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vshuf.w(<4 x i32> %va, <4 x i32> %vb, <4 x i32> %vc) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vshuf.d(<2 x i64>, <2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vshuf_d(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) nounwind { +; CHECK-LABEL: lsx_vshuf_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vshuf.d $vr0, $vr1, $vr2 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vshuf.d(<2 x i64> %va, <2 x i64> %vb, <2 x i64> %vc) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf4i-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf4i-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..4d6fadf08c26b17b1e9f6a0c69b4d0530accfccc --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf4i-invalid-imm.ll @@ -0,0 +1,65 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vshuf4i.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vshuf4i_b_lo(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vshuf4i.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vshuf4i.b(<16 x i8> %va, i32 -1) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vshuf4i_b_hi(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vshuf4i.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vshuf4i.b(<16 x i8> %va, i32 256) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vshuf4i.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vshuf4i_h_lo(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vshuf4i.h: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vshuf4i.h(<8 x i16> %va, i32 -1) + ret <8 x i16> %res +} + +define <8 x i16> @lsx_vshuf4i_h_hi(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vshuf4i.h: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vshuf4i.h(<8 x i16> %va, i32 256) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vshuf4i.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vshuf4i_w_lo(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vshuf4i.w: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vshuf4i.w(<4 x i32> %va, i32 -1) + ret <4 x i32> %res +} + +define <4 x i32> @lsx_vshuf4i_w_hi(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vshuf4i.w: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vshuf4i.w(<4 x i32> %va, i32 256) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vshuf4i.d(<2 x i64>, <2 x i64>, i32) + +define <2 x i64> @lsx_vshuf4i_d_lo(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vshuf4i.d: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vshuf4i.d(<2 x i64> %va, <2 x i64> %vb, i32 -1) + ret <2 x i64> %res +} + +define <2 x i64> @lsx_vshuf4i_d_hi(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vshuf4i.d: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vshuf4i.d(<2 x i64> %va, <2 x i64> %vb, i32 256) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf4i-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf4i-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..a7d138bcc00bbfeb9a3bb320f4733ed5341ce618 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf4i-non-imm.ll @@ -0,0 +1,37 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vshuf4i.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vshuf4i_b(<16 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vshuf4i.b(<16 x i8> %va, i32 %b) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vshuf4i.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vshuf4i_h(<8 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vshuf4i.h(<8 x i16> %va, i32 %b) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vshuf4i.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vshuf4i_w(<4 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vshuf4i.w(<4 x i32> %va, i32 %b) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vshuf4i.d(<2 x i64>, <2 x i64>, i32) + +define <2 x i64> @lsx_vshuf4i_d(<2 x i64> %va, <2 x i64> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vshuf4i.d(<2 x i64> %va, <2 x i64> %vb, i32 %c) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf4i.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf4i.ll new file mode 100644 index 0000000000000000000000000000000000000000..1ad5f2af5591e5a050d28a420542f81e69d9190d --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-shuf4i.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vshuf4i.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vshuf4i_b(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_vshuf4i_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vshuf4i.b $vr0, $vr0, 255 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vshuf4i.b(<16 x i8> %va, i32 255) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vshuf4i.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vshuf4i_h(<8 x i16> %va) nounwind { +; CHECK-LABEL: lsx_vshuf4i_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vshuf4i.h $vr0, $vr0, 255 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vshuf4i.h(<8 x i16> %va, i32 255) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vshuf4i.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vshuf4i_w(<4 x i32> %va) nounwind { +; CHECK-LABEL: lsx_vshuf4i_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vshuf4i.w $vr0, $vr0, 255 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vshuf4i.w(<4 x i32> %va, i32 255) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vshuf4i.d(<2 x i64>, <2 x i64>, i32) + +define <2 x i64> @lsx_vshuf4i_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vshuf4i_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vshuf4i.d $vr0, $vr1, 255 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vshuf4i.d(<2 x i64> %va, <2 x i64> %vb, i32 255) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-signcov.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-signcov.ll new file mode 100644 index 0000000000000000000000000000000000000000..3997b0cc995c50cae2a0c248235a01b3e54dbb56 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-signcov.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vsigncov.b(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vsigncov_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vsigncov_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsigncov.b $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsigncov.b(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsigncov.h(<8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vsigncov_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vsigncov_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsigncov.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsigncov.h(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsigncov.w(<4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vsigncov_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vsigncov_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsigncov.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsigncov.w(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsigncov.d(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vsigncov_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vsigncov_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsigncov.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsigncov.d(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sle-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sle-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..4c945e296711f7e6f46da803794a9452f6f343a1 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sle-invalid-imm.ll @@ -0,0 +1,129 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vslei.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vslei_b_lo(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vslei.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vslei.b(<16 x i8> %va, i32 -17) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vslei_b_hi(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vslei.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vslei.b(<16 x i8> %va, i32 16) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vslei.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vslei_h_lo(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vslei.h: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vslei.h(<8 x i16> %va, i32 -17) + ret <8 x i16> %res +} + +define <8 x i16> @lsx_vslei_h_hi(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vslei.h: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vslei.h(<8 x i16> %va, i32 16) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vslei.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vslei_w_lo(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vslei.w: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vslei.w(<4 x i32> %va, i32 -17) + ret <4 x i32> %res +} + +define <4 x i32> @lsx_vslei_w_hi(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vslei.w: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vslei.w(<4 x i32> %va, i32 16) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vslei.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vslei_d_lo(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vslei.d: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vslei.d(<2 x i64> %va, i32 -17) + ret <2 x i64> %res +} + +define <2 x i64> @lsx_vslei_d_hi(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vslei.d: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vslei.d(<2 x i64> %va, i32 16) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vslei.bu(<16 x i8>, i32) + +define <16 x i8> @lsx_vslei_bu_lo(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vslei.bu: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vslei.bu(<16 x i8> %va, i32 -1) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vslei_bu_hi(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vslei.bu: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vslei.bu(<16 x i8> %va, i32 32) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vslei.hu(<8 x i16>, i32) + +define <8 x i16> @lsx_vslei_hu_lo(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vslei.hu: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vslei.hu(<8 x i16> %va, i32 -1) + ret <8 x i16> %res +} + +define <8 x i16> @lsx_vslei_hu_hi(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vslei.hu: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vslei.hu(<8 x i16> %va, i32 32) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vslei.wu(<4 x i32>, i32) + +define <4 x i32> @lsx_vslei_wu_lo(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vslei.wu: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vslei.wu(<4 x i32> %va, i32 -1) + ret <4 x i32> %res +} + +define <4 x i32> @lsx_vslei_wu_hi(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vslei.wu: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vslei.wu(<4 x i32> %va, i32 32) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vslei.du(<2 x i64>, i32) + +define <2 x i64> @lsx_vslei_du_lo(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vslei.du: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vslei.du(<2 x i64> %va, i32 -1) + ret <2 x i64> %res +} + +define <2 x i64> @lsx_vslei_du_hi(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vslei.du: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vslei.du(<2 x i64> %va, i32 32) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sle-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sle-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..0fc137bf05498421a4df7fa30001a25c3159a91f --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sle-non-imm.ll @@ -0,0 +1,73 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vslei.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vslei_b(<16 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vslei.b(<16 x i8> %va, i32 %b) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vslei.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vslei_h(<8 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vslei.h(<8 x i16> %va, i32 %b) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vslei.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vslei_w(<4 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vslei.w(<4 x i32> %va, i32 %b) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vslei.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vslei_d(<2 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vslei.d(<2 x i64> %va, i32 %b) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vslei.bu(<16 x i8>, i32) + +define <16 x i8> @lsx_vslei_bu(<16 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vslei.bu(<16 x i8> %va, i32 %b) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vslei.hu(<8 x i16>, i32) + +define <8 x i16> @lsx_vslei_hu(<8 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vslei.hu(<8 x i16> %va, i32 %b) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vslei.wu(<4 x i32>, i32) + +define <4 x i32> @lsx_vslei_wu(<4 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vslei.wu(<4 x i32> %va, i32 %b) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vslei.du(<2 x i64>, i32) + +define <2 x i64> @lsx_vslei_du(<2 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vslei.du(<2 x i64> %va, i32 %b) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sle.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sle.ll new file mode 100644 index 0000000000000000000000000000000000000000..5a9d5f06e63f89d5710e14509531bcafa857c1db --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sle.ll @@ -0,0 +1,194 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vsle.b(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vsle_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vsle_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsle.b $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsle.b(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsle.h(<8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vsle_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vsle_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsle.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsle.h(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsle.w(<4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vsle_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vsle_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsle.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsle.w(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsle.d(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vsle_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vsle_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsle.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsle.d(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vslei.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vslei_b(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_vslei_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vslei.b $vr0, $vr0, 15 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vslei.b(<16 x i8> %va, i32 15) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vslei.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vslei_h(<8 x i16> %va) nounwind { +; CHECK-LABEL: lsx_vslei_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vslei.h $vr0, $vr0, 15 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vslei.h(<8 x i16> %va, i32 15) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vslei.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vslei_w(<4 x i32> %va) nounwind { +; CHECK-LABEL: lsx_vslei_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vslei.w $vr0, $vr0, -16 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vslei.w(<4 x i32> %va, i32 -16) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vslei.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vslei_d(<2 x i64> %va) nounwind { +; CHECK-LABEL: lsx_vslei_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vslei.d $vr0, $vr0, -16 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vslei.d(<2 x i64> %va, i32 -16) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vsle.bu(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vsle_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vsle_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsle.bu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsle.bu(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsle.hu(<8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vsle_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vsle_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsle.hu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsle.hu(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsle.wu(<4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vsle_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vsle_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsle.wu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsle.wu(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsle.du(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vsle_du(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vsle_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsle.du $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsle.du(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vslei.bu(<16 x i8>, i32) + +define <16 x i8> @lsx_vslei_bu(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_vslei_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vslei.bu $vr0, $vr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vslei.bu(<16 x i8> %va, i32 1) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vslei.hu(<8 x i16>, i32) + +define <8 x i16> @lsx_vslei_hu(<8 x i16> %va) nounwind { +; CHECK-LABEL: lsx_vslei_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vslei.hu $vr0, $vr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vslei.hu(<8 x i16> %va, i32 1) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vslei.wu(<4 x i32>, i32) + +define <4 x i32> @lsx_vslei_wu(<4 x i32> %va) nounwind { +; CHECK-LABEL: lsx_vslei_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vslei.wu $vr0, $vr0, 31 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vslei.wu(<4 x i32> %va, i32 31) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vslei.du(<2 x i64>, i32) + +define <2 x i64> @lsx_vslei_du(<2 x i64> %va) nounwind { +; CHECK-LABEL: lsx_vslei_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vslei.du $vr0, $vr0, 31 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vslei.du(<2 x i64> %va, i32 31) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sll-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sll-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..75406f94887ca9db353dce9acd37616d058d0612 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sll-invalid-imm.ll @@ -0,0 +1,65 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vslli.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vslli_b_lo(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vslli.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vslli.b(<16 x i8> %va, i32 -1) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vslli_b_hi(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vslli.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vslli.b(<16 x i8> %va, i32 8) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vslli.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vslli_h_lo(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vslli.h: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vslli.h(<8 x i16> %va, i32 -1) + ret <8 x i16> %res +} + +define <8 x i16> @lsx_vslli_h_hi(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vslli.h: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vslli.h(<8 x i16> %va, i32 16) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vslli.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vslli_w_lo(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vslli.w: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vslli.w(<4 x i32> %va, i32 -1) + ret <4 x i32> %res +} + +define <4 x i32> @lsx_vslli_w_hi(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vslli.w: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vslli.w(<4 x i32> %va, i32 32) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vslli.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vslli_d_lo(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vslli.d: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vslli.d(<2 x i64> %va, i32 -1) + ret <2 x i64> %res +} + +define <2 x i64> @lsx_vslli_d_hi(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vslli.d: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vslli.d(<2 x i64> %va, i32 64) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sll-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sll-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..7474b5e2973498ed36e591549fff3e783f718c49 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sll-non-imm.ll @@ -0,0 +1,37 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vslli.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vslli_b(<16 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vslli.b(<16 x i8> %va, i32 %b) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vslli.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vslli_h(<8 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vslli.h(<8 x i16> %va, i32 %b) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vslli.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vslli_w(<4 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vslli.w(<4 x i32> %va, i32 %b) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vslli.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vslli_d(<2 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vslli.d(<2 x i64> %va, i32 %b) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sll.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sll.ll new file mode 100644 index 0000000000000000000000000000000000000000..7bc20af41f17a865962af4cec51e08235afca161 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sll.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vsll.b(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vsll_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vsll_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsll.b $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsll.b(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsll.h(<8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vsll_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vsll_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsll.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsll.h(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsll.w(<4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vsll_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vsll_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsll.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsll.w(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsll.d(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vsll_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vsll_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsll.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsll.d(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vslli.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vslli_b(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_vslli_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vslli.b $vr0, $vr0, 7 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vslli.b(<16 x i8> %va, i32 7) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vslli.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vslli_h(<8 x i16> %va) nounwind { +; CHECK-LABEL: lsx_vslli_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vslli.h $vr0, $vr0, 15 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vslli.h(<8 x i16> %va, i32 15) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vslli.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vslli_w(<4 x i32> %va) nounwind { +; CHECK-LABEL: lsx_vslli_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vslli.w $vr0, $vr0, 31 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vslli.w(<4 x i32> %va, i32 31) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vslli.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vslli_d(<2 x i64> %va) nounwind { +; CHECK-LABEL: lsx_vslli_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vslli.d $vr0, $vr0, 63 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vslli.d(<2 x i64> %va, i32 63) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sllwil-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sllwil-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..bda3523a0b5c045ffc93c316465a7b1c94ea9868 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sllwil-invalid-imm.ll @@ -0,0 +1,97 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <8 x i16> @llvm.loongarch.lsx.vsllwil.h.b(<16 x i8>, i32) + +define <8 x i16> @lsx_vsllwil_h_b_lo(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsllwil.h.b: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsllwil.h.b(<16 x i8> %va, i32 -1) + ret <8 x i16> %res +} + +define <8 x i16> @lsx_vsllwil_h_b_hi(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsllwil.h.b: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsllwil.h.b(<16 x i8> %va, i32 8) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsllwil.w.h(<8 x i16>, i32) + +define <4 x i32> @lsx_vsllwil_w_h_lo(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsllwil.w.h: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsllwil.w.h(<8 x i16> %va, i32 -1) + ret <4 x i32> %res +} + +define <4 x i32> @lsx_vsllwil_w_h_hi(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsllwil.w.h: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsllwil.w.h(<8 x i16> %va, i32 16) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsllwil.d.w(<4 x i32>, i32) + +define <2 x i64> @lsx_vsllwil_d_w_lo(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsllwil.d.w: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsllwil.d.w(<4 x i32> %va, i32 -1) + ret <2 x i64> %res +} + +define <2 x i64> @lsx_vsllwil_d_w_hi(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsllwil.d.w: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsllwil.d.w(<4 x i32> %va, i32 32) + ret <2 x i64> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsllwil.hu.bu(<16 x i8>, i32) + +define <8 x i16> @lsx_vsllwil_hu_bu_lo(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsllwil.hu.bu: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsllwil.hu.bu(<16 x i8> %va, i32 -1) + ret <8 x i16> %res +} + +define <8 x i16> @lsx_vsllwil_hu_bu_hi(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsllwil.hu.bu: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsllwil.hu.bu(<16 x i8> %va, i32 8) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsllwil.wu.hu(<8 x i16>, i32) + +define <4 x i32> @lsx_vsllwil_wu_hu_lo(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsllwil.wu.hu: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsllwil.wu.hu(<8 x i16> %va, i32 -1) + ret <4 x i32> %res +} + +define <4 x i32> @lsx_vsllwil_wu_hu_hi(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsllwil.wu.hu: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsllwil.wu.hu(<8 x i16> %va, i32 16) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsllwil.du.wu(<4 x i32>, i32) + +define <2 x i64> @lsx_vsllwil_du_wu_lo(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsllwil.du.wu: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsllwil.du.wu(<4 x i32> %va, i32 -1) + ret <2 x i64> %res +} + +define <2 x i64> @lsx_vsllwil_du_wu_hi(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsllwil.du.wu: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsllwil.du.wu(<4 x i32> %va, i32 32) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sllwil-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sllwil-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..a03656d5ca07a03f2188578953c65067781f3540 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sllwil-non-imm.ll @@ -0,0 +1,55 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <8 x i16> @llvm.loongarch.lsx.vsllwil.h.b(<16 x i8>, i32) + +define <8 x i16> @lsx_vsllwil_h_b(<16 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsllwil.h.b(<16 x i8> %va, i32 %b) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsllwil.w.h(<8 x i16>, i32) + +define <4 x i32> @lsx_vsllwil_w_h(<8 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsllwil.w.h(<8 x i16> %va, i32 %b) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsllwil.d.w(<4 x i32>, i32) + +define <2 x i64> @lsx_vsllwil_d_w(<4 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsllwil.d.w(<4 x i32> %va, i32 %b) + ret <2 x i64> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsllwil.hu.bu(<16 x i8>, i32) + +define <8 x i16> @lsx_vsllwil_hu_bu(<16 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsllwil.hu.bu(<16 x i8> %va, i32 %b) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsllwil.wu.hu(<8 x i16>, i32) + +define <4 x i32> @lsx_vsllwil_wu_hu(<8 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsllwil.wu.hu(<8 x i16> %va, i32 %b) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsllwil.du.wu(<4 x i32>, i32) + +define <2 x i64> @lsx_vsllwil_du_wu(<4 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsllwil.du.wu(<4 x i32> %va, i32 %b) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sllwil.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sllwil.ll new file mode 100644 index 0000000000000000000000000000000000000000..29ab70da1ceda32e8d45e0bc529062708cdc943f --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sllwil.ll @@ -0,0 +1,74 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <8 x i16> @llvm.loongarch.lsx.vsllwil.h.b(<16 x i8>, i32) + +define <8 x i16> @lsx_vsllwil_h_b(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_vsllwil_h_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsllwil.h.b $vr0, $vr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsllwil.h.b(<16 x i8> %va, i32 1) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsllwil.w.h(<8 x i16>, i32) + +define <4 x i32> @lsx_vsllwil_w_h(<8 x i16> %va) nounwind { +; CHECK-LABEL: lsx_vsllwil_w_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsllwil.w.h $vr0, $vr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsllwil.w.h(<8 x i16> %va, i32 1) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsllwil.d.w(<4 x i32>, i32) + +define <2 x i64> @lsx_vsllwil_d_w(<4 x i32> %va) nounwind { +; CHECK-LABEL: lsx_vsllwil_d_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsllwil.d.w $vr0, $vr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsllwil.d.w(<4 x i32> %va, i32 1) + ret <2 x i64> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsllwil.hu.bu(<16 x i8>, i32) + +define <8 x i16> @lsx_vsllwil_hu_bu(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_vsllwil_hu_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsllwil.hu.bu $vr0, $vr0, 7 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsllwil.hu.bu(<16 x i8> %va, i32 7) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsllwil.wu.hu(<8 x i16>, i32) + +define <4 x i32> @lsx_vsllwil_wu_hu(<8 x i16> %va) nounwind { +; CHECK-LABEL: lsx_vsllwil_wu_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsllwil.wu.hu $vr0, $vr0, 15 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsllwil.wu.hu(<8 x i16> %va, i32 15) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsllwil.du.wu(<4 x i32>, i32) + +define <2 x i64> @lsx_vsllwil_du_wu(<4 x i32> %va) nounwind { +; CHECK-LABEL: lsx_vsllwil_du_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsllwil.du.wu $vr0, $vr0, 31 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsllwil.du.wu(<4 x i32> %va, i32 31) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-slt-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-slt-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..f6d014b19d6c7d7315e393c53efeb8b06db88288 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-slt-invalid-imm.ll @@ -0,0 +1,129 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vslti.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vslti_b_lo(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vslti.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vslti.b(<16 x i8> %va, i32 -17) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vslti_b_hi(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vslti.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vslti.b(<16 x i8> %va, i32 16) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vslti.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vslti_h_lo(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vslti.h: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vslti.h(<8 x i16> %va, i32 -17) + ret <8 x i16> %res +} + +define <8 x i16> @lsx_vslti_h_hi(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vslti.h: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vslti.h(<8 x i16> %va, i32 16) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vslti.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vslti_w_lo(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vslti.w: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vslti.w(<4 x i32> %va, i32 -17) + ret <4 x i32> %res +} + +define <4 x i32> @lsx_vslti_w_hi(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vslti.w: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vslti.w(<4 x i32> %va, i32 16) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vslti.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vslti_d_lo(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vslti.d: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vslti.d(<2 x i64> %va, i32 -17) + ret <2 x i64> %res +} + +define <2 x i64> @lsx_vslti_d_hi(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vslti.d: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vslti.d(<2 x i64> %va, i32 16) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vslti.bu(<16 x i8>, i32) + +define <16 x i8> @lsx_vslti_bu_lo(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vslti.bu: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vslti.bu(<16 x i8> %va, i32 -1) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vslti_bu_hi(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vslti.bu: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vslti.bu(<16 x i8> %va, i32 32) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vslti.hu(<8 x i16>, i32) + +define <8 x i16> @lsx_vslti_hu_lo(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vslti.hu: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vslti.hu(<8 x i16> %va, i32 -1) + ret <8 x i16> %res +} + +define <8 x i16> @lsx_vslti_hu_hi(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vslti.hu: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vslti.hu(<8 x i16> %va, i32 32) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vslti.wu(<4 x i32>, i32) + +define <4 x i32> @lsx_vslti_wu_lo(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vslti.wu: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vslti.wu(<4 x i32> %va, i32 -1) + ret <4 x i32> %res +} + +define <4 x i32> @lsx_vslti_wu_hi(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vslti.wu: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vslti.wu(<4 x i32> %va, i32 32) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vslti.du(<2 x i64>, i32) + +define <2 x i64> @lsx_vslti_du_lo(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vslti.du: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vslti.du(<2 x i64> %va, i32 -1) + ret <2 x i64> %res +} + +define <2 x i64> @lsx_vslti_du_hi(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vslti.du: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vslti.du(<2 x i64> %va, i32 32) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-slt-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-slt-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..9a8b757dab4e4a36baeefb08af9ccb6b5e3b39e8 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-slt-non-imm.ll @@ -0,0 +1,73 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vslti.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vslti_b(<16 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vslti.b(<16 x i8> %va, i32 %b) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vslti.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vslti_h(<8 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vslti.h(<8 x i16> %va, i32 %b) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vslti.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vslti_w(<4 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vslti.w(<4 x i32> %va, i32 %b) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vslti.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vslti_d(<2 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vslti.d(<2 x i64> %va, i32 %b) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vslti.bu(<16 x i8>, i32) + +define <16 x i8> @lsx_vslti_bu(<16 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vslti.bu(<16 x i8> %va, i32 %b) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vslti.hu(<8 x i16>, i32) + +define <8 x i16> @lsx_vslti_hu(<8 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vslti.hu(<8 x i16> %va, i32 %b) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vslti.wu(<4 x i32>, i32) + +define <4 x i32> @lsx_vslti_wu(<4 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vslti.wu(<4 x i32> %va, i32 %b) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vslti.du(<2 x i64>, i32) + +define <2 x i64> @lsx_vslti_du(<2 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vslti.du(<2 x i64> %va, i32 %b) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-slt.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-slt.ll new file mode 100644 index 0000000000000000000000000000000000000000..18683e9dc46f6354166d82a05219be43e867f7ad --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-slt.ll @@ -0,0 +1,194 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vslt.b(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vslt_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vslt_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vslt.b $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vslt.b(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vslt.h(<8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vslt_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vslt_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vslt.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vslt.h(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vslt.w(<4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vslt_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vslt_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vslt.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vslt.w(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vslt.d(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vslt_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vslt_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vslt.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vslt.d(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vslti.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vslti_b(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_vslti_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vslti.b $vr0, $vr0, 15 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vslti.b(<16 x i8> %va, i32 15) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vslti.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vslti_h(<8 x i16> %va) nounwind { +; CHECK-LABEL: lsx_vslti_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vslti.h $vr0, $vr0, 15 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vslti.h(<8 x i16> %va, i32 15) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vslti.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vslti_w(<4 x i32> %va) nounwind { +; CHECK-LABEL: lsx_vslti_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vslti.w $vr0, $vr0, -16 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vslti.w(<4 x i32> %va, i32 -16) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vslti.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vslti_d(<2 x i64> %va) nounwind { +; CHECK-LABEL: lsx_vslti_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vslti.d $vr0, $vr0, -16 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vslti.d(<2 x i64> %va, i32 -16) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vslt.bu(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vslt_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vslt_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vslt.bu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vslt.bu(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vslt.hu(<8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vslt_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vslt_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vslt.hu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vslt.hu(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vslt.wu(<4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vslt_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vslt_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vslt.wu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vslt.wu(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vslt.du(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vslt_du(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vslt_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vslt.du $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vslt.du(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vslti.bu(<16 x i8>, i32) + +define <16 x i8> @lsx_vslti_bu(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_vslti_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vslti.bu $vr0, $vr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vslti.bu(<16 x i8> %va, i32 1) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vslti.hu(<8 x i16>, i32) + +define <8 x i16> @lsx_vslti_hu(<8 x i16> %va) nounwind { +; CHECK-LABEL: lsx_vslti_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vslti.hu $vr0, $vr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vslti.hu(<8 x i16> %va, i32 1) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vslti.wu(<4 x i32>, i32) + +define <4 x i32> @lsx_vslti_wu(<4 x i32> %va) nounwind { +; CHECK-LABEL: lsx_vslti_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vslti.wu $vr0, $vr0, 31 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vslti.wu(<4 x i32> %va, i32 31) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vslti.du(<2 x i64>, i32) + +define <2 x i64> @lsx_vslti_du(<2 x i64> %va) nounwind { +; CHECK-LABEL: lsx_vslti_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vslti.du $vr0, $vr0, 31 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vslti.du(<2 x i64> %va, i32 31) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sra-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sra-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..2a033a21b5651eaeb7bb7c05e67316d3ece7f5e1 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sra-invalid-imm.ll @@ -0,0 +1,65 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vsrai.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vsrai_b_lo(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsrai.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsrai.b(<16 x i8> %va, i32 -1) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vsrai_b_hi(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsrai.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsrai.b(<16 x i8> %va, i32 8) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsrai.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vsrai_h_lo(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsrai.h: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsrai.h(<8 x i16> %va, i32 -1) + ret <8 x i16> %res +} + +define <8 x i16> @lsx_vsrai_h_hi(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsrai.h: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsrai.h(<8 x i16> %va, i32 16) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsrai.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vsrai_w_lo(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsrai.w: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsrai.w(<4 x i32> %va, i32 -1) + ret <4 x i32> %res +} + +define <4 x i32> @lsx_vsrai_w_hi(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsrai.w: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsrai.w(<4 x i32> %va, i32 32) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsrai.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vsrai_d_lo(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsrai.d: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsrai.d(<2 x i64> %va, i32 -1) + ret <2 x i64> %res +} + +define <2 x i64> @lsx_vsrai_d_hi(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsrai.d: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsrai.d(<2 x i64> %va, i32 64) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sra-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sra-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..c3b328145864fbfc82d1796e86c8411cd820a870 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sra-non-imm.ll @@ -0,0 +1,37 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vsrai.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vsrai_b(<16 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsrai.b(<16 x i8> %va, i32 %b) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsrai.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vsrai_h(<8 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsrai.h(<8 x i16> %va, i32 %b) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsrai.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vsrai_w(<4 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsrai.w(<4 x i32> %va, i32 %b) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsrai.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vsrai_d(<2 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsrai.d(<2 x i64> %va, i32 %b) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sra.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sra.ll new file mode 100644 index 0000000000000000000000000000000000000000..e85c8464c18e17237635cbc38cc8babd521a5183 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sra.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vsra.b(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vsra_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vsra_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsra.b $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsra.b(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsra.h(<8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vsra_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vsra_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsra.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsra.h(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsra.w(<4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vsra_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vsra_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsra.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsra.w(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsra.d(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vsra_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vsra_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsra.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsra.d(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vsrai.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vsrai_b(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_vsrai_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrai.b $vr0, $vr0, 7 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsrai.b(<16 x i8> %va, i32 7) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsrai.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vsrai_h(<8 x i16> %va) nounwind { +; CHECK-LABEL: lsx_vsrai_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrai.h $vr0, $vr0, 15 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsrai.h(<8 x i16> %va, i32 15) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsrai.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vsrai_w(<4 x i32> %va) nounwind { +; CHECK-LABEL: lsx_vsrai_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrai.w $vr0, $vr0, 31 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsrai.w(<4 x i32> %va, i32 31) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsrai.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vsrai_d(<2 x i64> %va) nounwind { +; CHECK-LABEL: lsx_vsrai_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrai.d $vr0, $vr0, 63 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsrai.d(<2 x i64> %va, i32 63) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sran.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sran.ll new file mode 100644 index 0000000000000000000000000000000000000000..4ffe5a704c2c882082fecbbf5d2f2edf82403ef2 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sran.ll @@ -0,0 +1,38 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vsran.b.h(<8 x i16>, <8 x i16>) + +define <16 x i8> @lsx_vsran_b_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vsran_b_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsran.b.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsran.b.h(<8 x i16> %va, <8 x i16> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsran.h.w(<4 x i32>, <4 x i32>) + +define <8 x i16> @lsx_vsran_h_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vsran_h_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsran.h.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsran.h.w(<4 x i32> %va, <4 x i32> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsran.w.d(<2 x i64>, <2 x i64>) + +define <4 x i32> @lsx_vsran_w_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vsran_w_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsran.w.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsran.w.d(<2 x i64> %va, <2 x i64> %vb) + ret <4 x i32> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srani-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srani-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..d68064e9b9024c08a9366652f8ac34e5e2ca16a2 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srani-invalid-imm.ll @@ -0,0 +1,65 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vsrani.b.h(<16 x i8>, <16 x i8>, i32) + +define <16 x i8> @lsx_vsrani_b_h_lo(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vsrani.b.h: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsrani.b.h(<16 x i8> %va, <16 x i8> %vb, i32 -1) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vsrani_b_h_hi(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vsrani.b.h: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsrani.b.h(<16 x i8> %va, <16 x i8> %vb, i32 16) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsrani.h.w(<8 x i16>, <8 x i16>, i32) + +define <8 x i16> @lsx_vsrani_h_w_lo(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vsrani.h.w: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsrani.h.w(<8 x i16> %va, <8 x i16> %vb, i32 -1) + ret <8 x i16> %res +} + +define <8 x i16> @lsx_vsrani_h_w_hi(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vsrani.h.w: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsrani.h.w(<8 x i16> %va, <8 x i16> %vb, i32 32) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsrani.w.d(<4 x i32>, <4 x i32>, i32) + +define <4 x i32> @lsx_vsrani_w_d_lo(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vsrani.w.d: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsrani.w.d(<4 x i32> %va, <4 x i32> %vb, i32 -1) + ret <4 x i32> %res +} + +define <4 x i32> @lsx_vsrani_w_d_hi(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vsrani.w.d: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsrani.w.d(<4 x i32> %va, <4 x i32> %vb, i32 64) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsrani.d.q(<2 x i64>, <2 x i64>, i32) + +define <2 x i64> @lsx_vsrani_d_q_lo(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vsrani.d.q: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsrani.d.q(<2 x i64> %va, <2 x i64> %vb, i32 -1) + ret <2 x i64> %res +} + +define <2 x i64> @lsx_vsrani_d_q_hi(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vsrani.d.q: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsrani.d.q(<2 x i64> %va, <2 x i64> %vb, i32 128) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srani-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srani-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..38cfde214dc1c2e21286c215288812a2fc69961f --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srani-non-imm.ll @@ -0,0 +1,37 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vsrani.b.h(<16 x i8>, <16 x i8>, i32) + +define <16 x i8> @lsx_vsrani_b_h(<16 x i8> %va, <16 x i8> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsrani.b.h(<16 x i8> %va, <16 x i8> %vb, i32 %c) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsrani.h.w(<8 x i16>, <8 x i16>, i32) + +define <8 x i16> @lsx_vsrani_h_w(<8 x i16> %va, <8 x i16> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsrani.h.w(<8 x i16> %va, <8 x i16> %vb, i32 %c) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsrani.w.d(<4 x i32>, <4 x i32>, i32) + +define <4 x i32> @lsx_vsrani_w_d(<4 x i32> %va, <4 x i32> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsrani.w.d(<4 x i32> %va, <4 x i32> %vb, i32 %c) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsrani.d.q(<2 x i64>, <2 x i64>, i32) + +define <2 x i64> @lsx_vsrani_d_q(<2 x i64> %va, <2 x i64> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsrani.d.q(<2 x i64> %va, <2 x i64> %vb, i32 %c) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srani.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srani.ll new file mode 100644 index 0000000000000000000000000000000000000000..717c641616c8d5235c6f57a664db9ef2fbfac77c --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srani.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vsrani.b.h(<16 x i8>, <16 x i8>, i32) + +define <16 x i8> @lsx_vsrani_b_h(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vsrani_b_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrani.b.h $vr0, $vr1, 15 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsrani.b.h(<16 x i8> %va, <16 x i8> %vb, i32 15) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsrani.h.w(<8 x i16>, <8 x i16>, i32) + +define <8 x i16> @lsx_vsrani_h_w(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vsrani_h_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrani.h.w $vr0, $vr1, 31 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsrani.h.w(<8 x i16> %va, <8 x i16> %vb, i32 31) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsrani.w.d(<4 x i32>, <4 x i32>, i32) + +define <4 x i32> @lsx_vsrani_w_d(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vsrani_w_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrani.w.d $vr0, $vr1, 63 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsrani.w.d(<4 x i32> %va, <4 x i32> %vb, i32 63) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsrani.d.q(<2 x i64>, <2 x i64>, i32) + +define <2 x i64> @lsx_vsrani_d_q(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vsrani_d_q: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrani.d.q $vr0, $vr1, 127 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsrani.d.q(<2 x i64> %va, <2 x i64> %vb, i32 127) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srar-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srar-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..b6c2d70cebbc02caced08a7d6b53e975fe1f8e60 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srar-invalid-imm.ll @@ -0,0 +1,65 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vsrari.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vsrari_b_lo(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsrari.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsrari.b(<16 x i8> %va, i32 -1) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vsrari_b_hi(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsrari.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsrari.b(<16 x i8> %va, i32 8) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsrari.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vsrari_h_lo(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsrari.h: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsrari.h(<8 x i16> %va, i32 -1) + ret <8 x i16> %res +} + +define <8 x i16> @lsx_vsrari_h_hi(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsrari.h: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsrari.h(<8 x i16> %va, i32 16) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsrari.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vsrari_w_lo(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsrari.w: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsrari.w(<4 x i32> %va, i32 -1) + ret <4 x i32> %res +} + +define <4 x i32> @lsx_vsrari_w_hi(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsrari.w: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsrari.w(<4 x i32> %va, i32 32) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsrari.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vsrari_d_lo(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsrari.d: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsrari.d(<2 x i64> %va, i32 -1) + ret <2 x i64> %res +} + +define <2 x i64> @lsx_vsrari_d_hi(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsrari.d: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsrari.d(<2 x i64> %va, i32 64) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srar-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srar-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..2ad8adcd823b6fb03a77a84982f6b1b44d3b6afd --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srar-non-imm.ll @@ -0,0 +1,37 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vsrari.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vsrari_b(<16 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsrari.b(<16 x i8> %va, i32 %b) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsrari.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vsrari_h(<8 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsrari.h(<8 x i16> %va, i32 %b) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsrari.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vsrari_w(<4 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsrari.w(<4 x i32> %va, i32 %b) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsrari.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vsrari_d(<2 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsrari.d(<2 x i64> %va, i32 %b) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srar.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srar.ll new file mode 100644 index 0000000000000000000000000000000000000000..8b52b7ac9631f4196e65b450c1cf0418cc6b9ab8 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srar.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vsrar.b(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vsrar_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vsrar_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrar.b $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsrar.b(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsrar.h(<8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vsrar_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vsrar_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrar.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsrar.h(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsrar.w(<4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vsrar_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vsrar_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrar.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsrar.w(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsrar.d(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vsrar_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vsrar_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrar.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsrar.d(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vsrari.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vsrari_b(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_vsrari_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrari.b $vr0, $vr0, 7 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsrari.b(<16 x i8> %va, i32 7) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsrari.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vsrari_h(<8 x i16> %va) nounwind { +; CHECK-LABEL: lsx_vsrari_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrari.h $vr0, $vr0, 15 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsrari.h(<8 x i16> %va, i32 15) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsrari.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vsrari_w(<4 x i32> %va) nounwind { +; CHECK-LABEL: lsx_vsrari_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrari.w $vr0, $vr0, 31 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsrari.w(<4 x i32> %va, i32 31) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsrari.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vsrari_d(<2 x i64> %va) nounwind { +; CHECK-LABEL: lsx_vsrari_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrari.d $vr0, $vr0, 63 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsrari.d(<2 x i64> %va, i32 63) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarn.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarn.ll new file mode 100644 index 0000000000000000000000000000000000000000..d4cdfb5359eaaee9b534de56e5b6e6cb59f3001a --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarn.ll @@ -0,0 +1,38 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vsrarn.b.h(<8 x i16>, <8 x i16>) + +define <16 x i8> @lsx_vsrarn_b_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vsrarn_b_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrarn.b.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsrarn.b.h(<8 x i16> %va, <8 x i16> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsrarn.h.w(<4 x i32>, <4 x i32>) + +define <8 x i16> @lsx_vsrarn_h_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vsrarn_h_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrarn.h.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsrarn.h.w(<4 x i32> %va, <4 x i32> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsrarn.w.d(<2 x i64>, <2 x i64>) + +define <4 x i32> @lsx_vsrarn_w_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vsrarn_w_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrarn.w.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsrarn.w.d(<2 x i64> %va, <2 x i64> %vb) + ret <4 x i32> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarni-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarni-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..d24cf92a039287c78534aad168f2038b78b0baa0 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarni-invalid-imm.ll @@ -0,0 +1,65 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vsrarni.b.h(<16 x i8>, <16 x i8>, i32) + +define <16 x i8> @lsx_vsrarni_b_h_lo(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vsrarni.b.h: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsrarni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 -1) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vsrarni_b_h_hi(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vsrarni.b.h: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsrarni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 16) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsrarni.h.w(<8 x i16>, <8 x i16>, i32) + +define <8 x i16> @lsx_vsrarni_h_w_lo(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vsrarni.h.w: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsrarni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 -1) + ret <8 x i16> %res +} + +define <8 x i16> @lsx_vsrarni_h_w_hi(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vsrarni.h.w: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsrarni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 32) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsrarni.w.d(<4 x i32>, <4 x i32>, i32) + +define <4 x i32> @lsx_vsrarni_w_d_lo(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vsrarni.w.d: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsrarni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 -1) + ret <4 x i32> %res +} + +define <4 x i32> @lsx_vsrarni_w_d_hi(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vsrarni.w.d: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsrarni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 64) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsrarni.d.q(<2 x i64>, <2 x i64>, i32) + +define <2 x i64> @lsx_vsrarni_d_q_lo(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vsrarni.d.q: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsrarni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 -1) + ret <2 x i64> %res +} + +define <2 x i64> @lsx_vsrarni_d_q_hi(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vsrarni.d.q: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsrarni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 128) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarni-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarni-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..19de7445cba1cb66ab4c944e7ef4752cab34ee5d --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarni-non-imm.ll @@ -0,0 +1,37 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vsrarni.b.h(<16 x i8>, <16 x i8>, i32) + +define <16 x i8> @lsx_vsrarni_b_h(<16 x i8> %va, <16 x i8> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsrarni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 %c) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsrarni.h.w(<8 x i16>, <8 x i16>, i32) + +define <8 x i16> @lsx_vsrarni_h_w(<8 x i16> %va, <8 x i16> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsrarni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 %c) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsrarni.w.d(<4 x i32>, <4 x i32>, i32) + +define <4 x i32> @lsx_vsrarni_w_d(<4 x i32> %va, <4 x i32> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsrarni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 %c) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsrarni.d.q(<2 x i64>, <2 x i64>, i32) + +define <2 x i64> @lsx_vsrarni_d_q(<2 x i64> %va, <2 x i64> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsrarni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 %c) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarni.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarni.ll new file mode 100644 index 0000000000000000000000000000000000000000..2253e88372fcbdd38c3a59b520ad8500fff08e73 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srarni.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vsrarni.b.h(<16 x i8>, <16 x i8>, i32) + +define <16 x i8> @lsx_vsrarni_b_h(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vsrarni_b_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrarni.b.h $vr0, $vr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsrarni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 1) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsrarni.h.w(<8 x i16>, <8 x i16>, i32) + +define <8 x i16> @lsx_vsrarni_h_w(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vsrarni_h_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrarni.h.w $vr0, $vr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsrarni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 1) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsrarni.w.d(<4 x i32>, <4 x i32>, i32) + +define <4 x i32> @lsx_vsrarni_w_d(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vsrarni_w_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrarni.w.d $vr0, $vr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsrarni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 1) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsrarni.d.q(<2 x i64>, <2 x i64>, i32) + +define <2 x i64> @lsx_vsrarni_d_q(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vsrarni_d_q: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrarni.d.q $vr0, $vr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsrarni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 1) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srl-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srl-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..3beff790afab6c15cf29318e553578527202fec1 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srl-invalid-imm.ll @@ -0,0 +1,65 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vsrli.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vsrli_b_lo(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsrli.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsrli.b(<16 x i8> %va, i32 -1) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vsrli_b_hi(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsrli.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsrli.b(<16 x i8> %va, i32 8) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsrli.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vsrli_h_lo(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsrli.h: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsrli.h(<8 x i16> %va, i32 -1) + ret <8 x i16> %res +} + +define <8 x i16> @lsx_vsrli_h_hi(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsrli.h: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsrli.h(<8 x i16> %va, i32 16) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsrli.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vsrli_w_lo(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsrli.w: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsrli.w(<4 x i32> %va, i32 -1) + ret <4 x i32> %res +} + +define <4 x i32> @lsx_vsrli_w_hi(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsrli.w: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsrli.w(<4 x i32> %va, i32 32) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsrli.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vsrli_d_lo(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsrli.d: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsrli.d(<2 x i64> %va, i32 -1) + ret <2 x i64> %res +} + +define <2 x i64> @lsx_vsrli_d_hi(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsrli.d: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsrli.d(<2 x i64> %va, i32 64) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srl-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srl-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..98652aca0d628013eba83149a992fdcf8cf566c7 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srl-non-imm.ll @@ -0,0 +1,37 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vsrli.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vsrli_b(<16 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsrli.b(<16 x i8> %va, i32 %b) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsrli.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vsrli_h(<8 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsrli.h(<8 x i16> %va, i32 %b) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsrli.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vsrli_w(<4 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsrli.w(<4 x i32> %va, i32 %b) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsrli.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vsrli_d(<2 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsrli.d(<2 x i64> %va, i32 %b) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srl.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srl.ll new file mode 100644 index 0000000000000000000000000000000000000000..1cddd9622233a116a952cd63861035df37f35585 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srl.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vsrl.b(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vsrl_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vsrl_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrl.b $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsrl.b(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsrl.h(<8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vsrl_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vsrl_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrl.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsrl.h(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsrl.w(<4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vsrl_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vsrl_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrl.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsrl.w(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsrl.d(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vsrl_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vsrl_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrl.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsrl.d(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vsrli.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vsrli_b(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_vsrli_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrli.b $vr0, $vr0, 7 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsrli.b(<16 x i8> %va, i32 7) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsrli.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vsrli_h(<8 x i16> %va) nounwind { +; CHECK-LABEL: lsx_vsrli_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrli.h $vr0, $vr0, 15 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsrli.h(<8 x i16> %va, i32 15) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsrli.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vsrli_w(<4 x i32> %va) nounwind { +; CHECK-LABEL: lsx_vsrli_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrli.w $vr0, $vr0, 31 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsrli.w(<4 x i32> %va, i32 31) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsrli.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vsrli_d(<2 x i64> %va) nounwind { +; CHECK-LABEL: lsx_vsrli_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrli.d $vr0, $vr0, 63 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsrli.d(<2 x i64> %va, i32 63) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srln.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srln.ll new file mode 100644 index 0000000000000000000000000000000000000000..1c9b23243ffbdba0e762967669d2e39d74c76f12 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srln.ll @@ -0,0 +1,38 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vsrln.b.h(<8 x i16>, <8 x i16>) + +define <16 x i8> @lsx_vsrln_b_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vsrln_b_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrln.b.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsrln.b.h(<8 x i16> %va, <8 x i16> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsrln.h.w(<4 x i32>, <4 x i32>) + +define <8 x i16> @lsx_vsrln_h_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vsrln_h_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrln.h.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsrln.h.w(<4 x i32> %va, <4 x i32> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsrln.w.d(<2 x i64>, <2 x i64>) + +define <4 x i32> @lsx_vsrln_w_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vsrln_w_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrln.w.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsrln.w.d(<2 x i64> %va, <2 x i64> %vb) + ret <4 x i32> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlni-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlni-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..054c4f393548f9fb4a4a646acef6d3f6f53f674d --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlni-invalid-imm.ll @@ -0,0 +1,65 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vsrlni.b.h(<16 x i8>, <16 x i8>, i32) + +define <16 x i8> @lsx_vsrlni_b_h_lo(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vsrlni.b.h: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsrlni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 -1) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vsrlni_b_h_hi(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vsrlni.b.h: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsrlni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 16) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsrlni.h.w(<8 x i16>, <8 x i16>, i32) + +define <8 x i16> @lsx_vsrlni_h_w_lo(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vsrlni.h.w: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsrlni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 -1) + ret <8 x i16> %res +} + +define <8 x i16> @lsx_vsrlni_h_w_hi(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vsrlni.h.w: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsrlni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 32) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsrlni.w.d(<4 x i32>, <4 x i32>, i32) + +define <4 x i32> @lsx_vsrlni_w_d_lo(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vsrlni.w.d: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsrlni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 -1) + ret <4 x i32> %res +} + +define <4 x i32> @lsx_vsrlni_w_d_hi(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vsrlni.w.d: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsrlni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 64) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsrlni.d.q(<2 x i64>, <2 x i64>, i32) + +define <2 x i64> @lsx_vsrlni_d_q_lo(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vsrlni.d.q: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsrlni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 -1) + ret <2 x i64> %res +} + +define <2 x i64> @lsx_vsrlni_d_q_hi(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vsrlni.d.q: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsrlni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 128) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlni-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlni-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..76341df197fdf57ade83515b39d02032b6291885 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlni-non-imm.ll @@ -0,0 +1,37 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vsrlni.b.h(<16 x i8>, <16 x i8>, i32) + +define <16 x i8> @lsx_vsrlni_b_h(<16 x i8> %va, <16 x i8> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsrlni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 %c) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsrlni.h.w(<8 x i16>, <8 x i16>, i32) + +define <8 x i16> @lsx_vsrlni_h_w(<8 x i16> %va, <8 x i16> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsrlni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 %c) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsrlni.w.d(<4 x i32>, <4 x i32>, i32) + +define <4 x i32> @lsx_vsrlni_w_d(<4 x i32> %va, <4 x i32> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsrlni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 %c) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsrlni.d.q(<2 x i64>, <2 x i64>, i32) + +define <2 x i64> @lsx_vsrlni_d_q(<2 x i64> %va, <2 x i64> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsrlni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 %c) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlni.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlni.ll new file mode 100644 index 0000000000000000000000000000000000000000..6e523efa182405c4980e1ac7effe6c37e18551c0 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlni.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vsrlni.b.h(<16 x i8>, <16 x i8>, i32) + +define <16 x i8> @lsx_vsrlni_b_h(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vsrlni_b_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrlni.b.h $vr0, $vr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsrlni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 1) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsrlni.h.w(<8 x i16>, <8 x i16>, i32) + +define <8 x i16> @lsx_vsrlni_h_w(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vsrlni_h_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrlni.h.w $vr0, $vr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsrlni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 1) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsrlni.w.d(<4 x i32>, <4 x i32>, i32) + +define <4 x i32> @lsx_vsrlni_w_d(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vsrlni_w_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrlni.w.d $vr0, $vr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsrlni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 1) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsrlni.d.q(<2 x i64>, <2 x i64>, i32) + +define <2 x i64> @lsx_vsrlni_d_q(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vsrlni_d_q: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrlni.d.q $vr0, $vr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsrlni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 1) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlr-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlr-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..bcbd38e26e5f550de3c68db7fee327f9003f9945 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlr-invalid-imm.ll @@ -0,0 +1,65 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vsrlri.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vsrlri_b_lo(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsrlri.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsrlri.b(<16 x i8> %va, i32 -1) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vsrlri_b_hi(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsrlri.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsrlri.b(<16 x i8> %va, i32 8) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsrlri.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vsrlri_h_lo(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsrlri.h: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsrlri.h(<8 x i16> %va, i32 -1) + ret <8 x i16> %res +} + +define <8 x i16> @lsx_vsrlri_h_hi(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsrlri.h: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsrlri.h(<8 x i16> %va, i32 16) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsrlri.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vsrlri_w_lo(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsrlri.w: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsrlri.w(<4 x i32> %va, i32 -1) + ret <4 x i32> %res +} + +define <4 x i32> @lsx_vsrlri_w_hi(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsrlri.w: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsrlri.w(<4 x i32> %va, i32 32) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsrlri.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vsrlri_d_lo(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsrlri.d: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsrlri.d(<2 x i64> %va, i32 -1) + ret <2 x i64> %res +} + +define <2 x i64> @lsx_vsrlri_d_hi(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsrlri.d: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsrlri.d(<2 x i64> %va, i32 64) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlr-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlr-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..4862b1546ccf516f22c7203d5e6d2c1e5f31929e --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlr-non-imm.ll @@ -0,0 +1,37 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vsrlri.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vsrlri_b(<16 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsrlri.b(<16 x i8> %va, i32 %b) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsrlri.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vsrlri_h(<8 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsrlri.h(<8 x i16> %va, i32 %b) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsrlri.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vsrlri_w(<4 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsrlri.w(<4 x i32> %va, i32 %b) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsrlri.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vsrlri_d(<2 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsrlri.d(<2 x i64> %va, i32 %b) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlr.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlr.ll new file mode 100644 index 0000000000000000000000000000000000000000..51638fa1a47f41928cdcfcd682e803ccf8065413 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlr.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vsrlr.b(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vsrlr_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vsrlr_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrlr.b $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsrlr.b(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsrlr.h(<8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vsrlr_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vsrlr_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrlr.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsrlr.h(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsrlr.w(<4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vsrlr_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vsrlr_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrlr.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsrlr.w(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsrlr.d(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vsrlr_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vsrlr_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrlr.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsrlr.d(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vsrlri.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vsrlri_b(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_vsrlri_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrlri.b $vr0, $vr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsrlri.b(<16 x i8> %va, i32 1) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsrlri.h(<8 x i16>, i32) + +define <8 x i16> @lsx_vsrlri_h(<8 x i16> %va) nounwind { +; CHECK-LABEL: lsx_vsrlri_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrlri.h $vr0, $vr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsrlri.h(<8 x i16> %va, i32 1) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsrlri.w(<4 x i32>, i32) + +define <4 x i32> @lsx_vsrlri_w(<4 x i32> %va) nounwind { +; CHECK-LABEL: lsx_vsrlri_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrlri.w $vr0, $vr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsrlri.w(<4 x i32> %va, i32 1) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsrlri.d(<2 x i64>, i32) + +define <2 x i64> @lsx_vsrlri_d(<2 x i64> %va) nounwind { +; CHECK-LABEL: lsx_vsrlri_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrlri.d $vr0, $vr0, 1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsrlri.d(<2 x i64> %va, i32 1) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrn.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrn.ll new file mode 100644 index 0000000000000000000000000000000000000000..893e5139624112217ca69ea4f13be220b5f80b98 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrn.ll @@ -0,0 +1,38 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vsrlrn.b.h(<8 x i16>, <8 x i16>) + +define <16 x i8> @lsx_vsrlrn_b_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vsrlrn_b_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrlrn.b.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsrlrn.b.h(<8 x i16> %va, <8 x i16> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsrlrn.h.w(<4 x i32>, <4 x i32>) + +define <8 x i16> @lsx_vsrlrn_h_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vsrlrn_h_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrlrn.h.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsrlrn.h.w(<4 x i32> %va, <4 x i32> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsrlrn.w.d(<2 x i64>, <2 x i64>) + +define <4 x i32> @lsx_vsrlrn_w_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vsrlrn_w_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrlrn.w.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsrlrn.w.d(<2 x i64> %va, <2 x i64> %vb) + ret <4 x i32> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrni-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrni-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..8988ae88f9ebfafedf42e24d0a09f8592d82526e --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrni-invalid-imm.ll @@ -0,0 +1,65 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vsrlrni.b.h(<16 x i8>, <16 x i8>, i32) + +define <16 x i8> @lsx_vsrlrni_b_h_lo(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vsrlrni.b.h: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsrlrni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 -1) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vsrlrni_b_h_hi(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vsrlrni.b.h: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsrlrni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 16) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsrlrni.h.w(<8 x i16>, <8 x i16>, i32) + +define <8 x i16> @lsx_vsrlrni_h_w_lo(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vsrlrni.h.w: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsrlrni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 -1) + ret <8 x i16> %res +} + +define <8 x i16> @lsx_vsrlrni_h_w_hi(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vsrlrni.h.w: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsrlrni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 32) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsrlrni.w.d(<4 x i32>, <4 x i32>, i32) + +define <4 x i32> @lsx_vsrlrni_w_d_lo(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vsrlrni.w.d: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsrlrni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 -1) + ret <4 x i32> %res +} + +define <4 x i32> @lsx_vsrlrni_w_d_hi(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vsrlrni.w.d: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsrlrni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 64) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsrlrni.d.q(<2 x i64>, <2 x i64>, i32) + +define <2 x i64> @lsx_vsrlrni_d_q_lo(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vsrlrni.d.q: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsrlrni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 -1) + ret <2 x i64> %res +} + +define <2 x i64> @lsx_vsrlrni_d_q_hi(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vsrlrni.d.q: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsrlrni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 128) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrni-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrni-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..e5530db56fed9f036567f73d7a1057f21942a1f1 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrni-non-imm.ll @@ -0,0 +1,37 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vsrlrni.b.h(<16 x i8>, <16 x i8>, i32) + +define <16 x i8> @lsx_vsrlrni_b_h(<16 x i8> %va, <16 x i8> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsrlrni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 %c) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsrlrni.h.w(<8 x i16>, <8 x i16>, i32) + +define <8 x i16> @lsx_vsrlrni_h_w(<8 x i16> %va, <8 x i16> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsrlrni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 %c) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsrlrni.w.d(<4 x i32>, <4 x i32>, i32) + +define <4 x i32> @lsx_vsrlrni_w_d(<4 x i32> %va, <4 x i32> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsrlrni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 %c) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsrlrni.d.q(<2 x i64>, <2 x i64>, i32) + +define <2 x i64> @lsx_vsrlrni_d_q(<2 x i64> %va, <2 x i64> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsrlrni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 %c) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrni.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrni.ll new file mode 100644 index 0000000000000000000000000000000000000000..d1ea450d2237dec2fcd5afc0afb2320b2d4b4ae6 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-srlrni.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vsrlrni.b.h(<16 x i8>, <16 x i8>, i32) + +define <16 x i8> @lsx_vsrlrni_b_h(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vsrlrni_b_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrlrni.b.h $vr0, $vr1, 15 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsrlrni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 15) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsrlrni.h.w(<8 x i16>, <8 x i16>, i32) + +define <8 x i16> @lsx_vsrlrni_h_w(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vsrlrni_h_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrlrni.h.w $vr0, $vr1, 31 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsrlrni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 31) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsrlrni.w.d(<4 x i32>, <4 x i32>, i32) + +define <4 x i32> @lsx_vsrlrni_w_d(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vsrlrni_w_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrlrni.w.d $vr0, $vr1, 63 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsrlrni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 63) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsrlrni.d.q(<2 x i64>, <2 x i64>, i32) + +define <2 x i64> @lsx_vsrlrni_d_q(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vsrlrni_d_q: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsrlrni.d.q $vr0, $vr1, 127 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsrlrni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 127) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssran.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssran.ll new file mode 100644 index 0000000000000000000000000000000000000000..cecccbb730c950d35b2d769fe194e926bb7a3501 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssran.ll @@ -0,0 +1,74 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vssran.b.h(<8 x i16>, <8 x i16>) + +define <16 x i8> @lsx_vssran_b_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vssran_b_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssran.b.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vssran.b.h(<8 x i16> %va, <8 x i16> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vssran.h.w(<4 x i32>, <4 x i32>) + +define <8 x i16> @lsx_vssran_h_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vssran_h_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssran.h.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vssran.h.w(<4 x i32> %va, <4 x i32> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vssran.w.d(<2 x i64>, <2 x i64>) + +define <4 x i32> @lsx_vssran_w_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vssran_w_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssran.w.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vssran.w.d(<2 x i64> %va, <2 x i64> %vb) + ret <4 x i32> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vssran.bu.h(<8 x i16>, <8 x i16>) + +define <16 x i8> @lsx_vssran_bu_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vssran_bu_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssran.bu.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vssran.bu.h(<8 x i16> %va, <8 x i16> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vssran.hu.w(<4 x i32>, <4 x i32>) + +define <8 x i16> @lsx_vssran_hu_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vssran_hu_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssran.hu.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vssran.hu.w(<4 x i32> %va, <4 x i32> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vssran.wu.d(<2 x i64>, <2 x i64>) + +define <4 x i32> @lsx_vssran_wu_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vssran_wu_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssran.wu.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vssran.wu.d(<2 x i64> %va, <2 x i64> %vb) + ret <4 x i32> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrani-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrani-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..f7817921ebebcbe513299b0b3989bcc184c72a24 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrani-invalid-imm.ll @@ -0,0 +1,129 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vssrani.b.h(<16 x i8>, <16 x i8>, i32) + +define <16 x i8> @lsx_vssrani_b_h_lo(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrani.b.h: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vssrani.b.h(<16 x i8> %va, <16 x i8> %vb, i32 -1) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vssrani_b_h_hi(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrani.b.h: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vssrani.b.h(<16 x i8> %va, <16 x i8> %vb, i32 16) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vssrani.h.w(<8 x i16>, <8 x i16>, i32) + +define <8 x i16> @lsx_vssrani_h_w_lo(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrani.h.w: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vssrani.h.w(<8 x i16> %va, <8 x i16> %vb, i32 -1) + ret <8 x i16> %res +} + +define <8 x i16> @lsx_vssrani_h_w_hi(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrani.h.w: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vssrani.h.w(<8 x i16> %va, <8 x i16> %vb, i32 32) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vssrani.w.d(<4 x i32>, <4 x i32>, i32) + +define <4 x i32> @lsx_vssrani_w_d_lo(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrani.w.d: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vssrani.w.d(<4 x i32> %va, <4 x i32> %vb, i32 -1) + ret <4 x i32> %res +} + +define <4 x i32> @lsx_vssrani_w_d_hi(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrani.w.d: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vssrani.w.d(<4 x i32> %va, <4 x i32> %vb, i32 64) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vssrani.d.q(<2 x i64>, <2 x i64>, i32) + +define <2 x i64> @lsx_vssrani_d_q_lo(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrani.d.q: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vssrani.d.q(<2 x i64> %va, <2 x i64> %vb, i32 -1) + ret <2 x i64> %res +} + +define <2 x i64> @lsx_vssrani_d_q_hi(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrani.d.q: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vssrani.d.q(<2 x i64> %va, <2 x i64> %vb, i32 128) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vssrani.bu.h(<16 x i8>, <16 x i8>, i32) + +define <16 x i8> @lsx_vssrani_bu_h_lo(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrani.bu.h: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vssrani.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 -1) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vssrani_bu_h_hi(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrani.bu.h: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vssrani.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 16) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vssrani.hu.w(<8 x i16>, <8 x i16>, i32) + +define <8 x i16> @lsx_vssrani_hu_w_lo(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrani.hu.w: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vssrani.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 -1) + ret <8 x i16> %res +} + +define <8 x i16> @lsx_vssrani_hu_w_hi(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrani.hu.w: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vssrani.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 32) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vssrani.wu.d(<4 x i32>, <4 x i32>, i32) + +define <4 x i32> @lsx_vssrani_wu_d_lo(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrani.wu.d: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vssrani.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 -1) + ret <4 x i32> %res +} + +define <4 x i32> @lsx_vssrani_wu_d_hi(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrani.wu.d: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vssrani.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 64) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vssrani.du.q(<2 x i64>, <2 x i64>, i32) + +define <2 x i64> @lsx_vssrani_du_q_lo(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrani.du.q: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vssrani.du.q(<2 x i64> %va, <2 x i64> %vb, i32 -1) + ret <2 x i64> %res +} + +define <2 x i64> @lsx_vssrani_du_q_hi(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrani.du.q: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vssrani.du.q(<2 x i64> %va, <2 x i64> %vb, i32 128) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrani-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrani-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..a80ede9c5243eebee161593644bf4e2bc12bd106 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrani-non-imm.ll @@ -0,0 +1,73 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vssrani.b.h(<16 x i8>, <16 x i8>, i32) + +define <16 x i8> @lsx_vssrani_b_h(<16 x i8> %va, <16 x i8> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vssrani.b.h(<16 x i8> %va, <16 x i8> %vb, i32 %c) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vssrani.h.w(<8 x i16>, <8 x i16>, i32) + +define <8 x i16> @lsx_vssrani_h_w(<8 x i16> %va, <8 x i16> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vssrani.h.w(<8 x i16> %va, <8 x i16> %vb, i32 %c) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vssrani.w.d(<4 x i32>, <4 x i32>, i32) + +define <4 x i32> @lsx_vssrani_w_d(<4 x i32> %va, <4 x i32> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vssrani.w.d(<4 x i32> %va, <4 x i32> %vb, i32 %c) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vssrani.d.q(<2 x i64>, <2 x i64>, i32) + +define <2 x i64> @lsx_vssrani_d_q(<2 x i64> %va, <2 x i64> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vssrani.d.q(<2 x i64> %va, <2 x i64> %vb, i32 %c) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vssrani.bu.h(<16 x i8>, <16 x i8>, i32) + +define <16 x i8> @lsx_vssrani_bu_h(<16 x i8> %va, <16 x i8> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vssrani.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 %c) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vssrani.hu.w(<8 x i16>, <8 x i16>, i32) + +define <8 x i16> @lsx_vssrani_hu_w(<8 x i16> %va, <8 x i16> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vssrani.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 %c) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vssrani.wu.d(<4 x i32>, <4 x i32>, i32) + +define <4 x i32> @lsx_vssrani_wu_d(<4 x i32> %va, <4 x i32> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vssrani.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 %c) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vssrani.du.q(<2 x i64>, <2 x i64>, i32) + +define <2 x i64> @lsx_vssrani_du_q(<2 x i64> %va, <2 x i64> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vssrani.du.q(<2 x i64> %va, <2 x i64> %vb, i32 %c) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrani.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrani.ll new file mode 100644 index 0000000000000000000000000000000000000000..57b8eb16986660f68e4017420b7af3a09b21e141 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrani.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vssrani.b.h(<16 x i8>, <16 x i8>, i32) + +define <16 x i8> @lsx_vssrani_b_h(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vssrani_b_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrani.b.h $vr0, $vr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vssrani.b.h(<16 x i8> %va, <16 x i8> %vb, i32 1) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vssrani.h.w(<8 x i16>, <8 x i16>, i32) + +define <8 x i16> @lsx_vssrani_h_w(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vssrani_h_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrani.h.w $vr0, $vr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vssrani.h.w(<8 x i16> %va, <8 x i16> %vb, i32 1) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vssrani.w.d(<4 x i32>, <4 x i32>, i32) + +define <4 x i32> @lsx_vssrani_w_d(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vssrani_w_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrani.w.d $vr0, $vr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vssrani.w.d(<4 x i32> %va, <4 x i32> %vb, i32 1) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vssrani.d.q(<2 x i64>, <2 x i64>, i32) + +define <2 x i64> @lsx_vssrani_d_q(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vssrani_d_q: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrani.d.q $vr0, $vr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vssrani.d.q(<2 x i64> %va, <2 x i64> %vb, i32 1) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vssrani.bu.h(<16 x i8>, <16 x i8>, i32) + +define <16 x i8> @lsx_vssrani_bu_h(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vssrani_bu_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrani.bu.h $vr0, $vr1, 15 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vssrani.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 15) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vssrani.hu.w(<8 x i16>, <8 x i16>, i32) + +define <8 x i16> @lsx_vssrani_hu_w(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vssrani_hu_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrani.hu.w $vr0, $vr1, 31 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vssrani.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 31) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vssrani.wu.d(<4 x i32>, <4 x i32>, i32) + +define <4 x i32> @lsx_vssrani_wu_d(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vssrani_wu_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrani.wu.d $vr0, $vr1, 63 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vssrani.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 63) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vssrani.du.q(<2 x i64>, <2 x i64>, i32) + +define <2 x i64> @lsx_vssrani_du_q(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vssrani_du_q: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrani.du.q $vr0, $vr1, 127 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vssrani.du.q(<2 x i64> %va, <2 x i64> %vb, i32 127) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarn.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarn.ll new file mode 100644 index 0000000000000000000000000000000000000000..c6b7d9ec8e1d60134ca91bfaf58133540a9be477 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarn.ll @@ -0,0 +1,74 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vssrarn.b.h(<8 x i16>, <8 x i16>) + +define <16 x i8> @lsx_vssrarn_b_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vssrarn_b_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrarn.b.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vssrarn.b.h(<8 x i16> %va, <8 x i16> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vssrarn.h.w(<4 x i32>, <4 x i32>) + +define <8 x i16> @lsx_vssrarn_h_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vssrarn_h_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrarn.h.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vssrarn.h.w(<4 x i32> %va, <4 x i32> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vssrarn.w.d(<2 x i64>, <2 x i64>) + +define <4 x i32> @lsx_vssrarn_w_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vssrarn_w_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrarn.w.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vssrarn.w.d(<2 x i64> %va, <2 x i64> %vb) + ret <4 x i32> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vssrarn.bu.h(<8 x i16>, <8 x i16>) + +define <16 x i8> @lsx_vssrarn_bu_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vssrarn_bu_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrarn.bu.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vssrarn.bu.h(<8 x i16> %va, <8 x i16> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vssrarn.hu.w(<4 x i32>, <4 x i32>) + +define <8 x i16> @lsx_vssrarn_hu_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vssrarn_hu_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrarn.hu.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vssrarn.hu.w(<4 x i32> %va, <4 x i32> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vssrarn.wu.d(<2 x i64>, <2 x i64>) + +define <4 x i32> @lsx_vssrarn_wu_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vssrarn_wu_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrarn.wu.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vssrarn.wu.d(<2 x i64> %va, <2 x i64> %vb) + ret <4 x i32> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarni-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarni-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..4edda8c0a24adb561907c1c4ce4303b3535daf64 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarni-invalid-imm.ll @@ -0,0 +1,129 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vssrarni.b.h(<16 x i8>, <16 x i8>, i32) + +define <16 x i8> @lsx_vssrarni_b_h_lo(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrarni.b.h: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vssrarni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 -1) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vssrarni_b_h_hi(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrarni.b.h: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vssrarni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 16) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vssrarni.h.w(<8 x i16>, <8 x i16>, i32) + +define <8 x i16> @lsx_vssrarni_h_w_lo(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrarni.h.w: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vssrarni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 -1) + ret <8 x i16> %res +} + +define <8 x i16> @lsx_vssrarni_h_w_hi(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrarni.h.w: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vssrarni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 32) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vssrarni.w.d(<4 x i32>, <4 x i32>, i32) + +define <4 x i32> @lsx_vssrarni_w_d_lo(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrarni.w.d: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vssrarni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 -1) + ret <4 x i32> %res +} + +define <4 x i32> @lsx_vssrarni_w_d_hi(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrarni.w.d: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vssrarni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 64) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vssrarni.d.q(<2 x i64>, <2 x i64>, i32) + +define <2 x i64> @lsx_vssrarni_d_q_lo(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrarni.d.q: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vssrarni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 -1) + ret <2 x i64> %res +} + +define <2 x i64> @lsx_vssrarni_d_q_hi(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrarni.d.q: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vssrarni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 128) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vssrarni.bu.h(<16 x i8>, <16 x i8>, i32) + +define <16 x i8> @lsx_vssrarni_bu_h_lo(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrarni.bu.h: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vssrarni.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 -1) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vssrarni_bu_h_hi(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrarni.bu.h: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vssrarni.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 16) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vssrarni.hu.w(<8 x i16>, <8 x i16>, i32) + +define <8 x i16> @lsx_vssrarni_hu_w_lo(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrarni.hu.w: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vssrarni.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 -1) + ret <8 x i16> %res +} + +define <8 x i16> @lsx_vssrarni_hu_w_hi(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrarni.hu.w: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vssrarni.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 32) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vssrarni.wu.d(<4 x i32>, <4 x i32>, i32) + +define <4 x i32> @lsx_vssrarni_wu_d_lo(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrarni.wu.d: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vssrarni.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 -1) + ret <4 x i32> %res +} + +define <4 x i32> @lsx_vssrarni_wu_d_hi(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrarni.wu.d: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vssrarni.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 64) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vssrarni.du.q(<2 x i64>, <2 x i64>, i32) + +define <2 x i64> @lsx_vssrarni_du_q_lo(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrarni.du.q: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vssrarni.du.q(<2 x i64> %va, <2 x i64> %vb, i32 -1) + ret <2 x i64> %res +} + +define <2 x i64> @lsx_vssrarni_du_q_hi(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrarni.du.q: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vssrarni.du.q(<2 x i64> %va, <2 x i64> %vb, i32 128) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarni-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarni-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..a77e6e764c9d4d3a92c12f3c40eb74b28bb78cf4 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarni-non-imm.ll @@ -0,0 +1,73 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vssrarni.b.h(<16 x i8>, <16 x i8>, i32) + +define <16 x i8> @lsx_vssrarni_b_h(<16 x i8> %va, <16 x i8> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vssrarni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 %c) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vssrarni.h.w(<8 x i16>, <8 x i16>, i32) + +define <8 x i16> @lsx_vssrarni_h_w(<8 x i16> %va, <8 x i16> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vssrarni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 %c) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vssrarni.w.d(<4 x i32>, <4 x i32>, i32) + +define <4 x i32> @lsx_vssrarni_w_d(<4 x i32> %va, <4 x i32> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vssrarni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 %c) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vssrarni.d.q(<2 x i64>, <2 x i64>, i32) + +define <2 x i64> @lsx_vssrarni_d_q(<2 x i64> %va, <2 x i64> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vssrarni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 %c) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vssrarni.bu.h(<16 x i8>, <16 x i8>, i32) + +define <16 x i8> @lsx_vssrarni_bu_h(<16 x i8> %va, <16 x i8> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vssrarni.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 %c) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vssrarni.hu.w(<8 x i16>, <8 x i16>, i32) + +define <8 x i16> @lsx_vssrarni_hu_w(<8 x i16> %va, <8 x i16> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vssrarni.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 %c) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vssrarni.wu.d(<4 x i32>, <4 x i32>, i32) + +define <4 x i32> @lsx_vssrarni_wu_d(<4 x i32> %va, <4 x i32> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vssrarni.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 %c) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vssrarni.du.q(<2 x i64>, <2 x i64>, i32) + +define <2 x i64> @lsx_vssrarni_du_q(<2 x i64> %va, <2 x i64> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vssrarni.du.q(<2 x i64> %va, <2 x i64> %vb, i32 %c) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarni.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarni.ll new file mode 100644 index 0000000000000000000000000000000000000000..1a2e91962ac3b6ec9541dd3c1bf856cf6f36f342 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrarni.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vssrarni.b.h(<16 x i8>, <16 x i8>, i32) + +define <16 x i8> @lsx_vssrarni_b_h(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vssrarni_b_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrarni.b.h $vr0, $vr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vssrarni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 1) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vssrarni.h.w(<8 x i16>, <8 x i16>, i32) + +define <8 x i16> @lsx_vssrarni_h_w(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vssrarni_h_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrarni.h.w $vr0, $vr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vssrarni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 1) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vssrarni.w.d(<4 x i32>, <4 x i32>, i32) + +define <4 x i32> @lsx_vssrarni_w_d(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vssrarni_w_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrarni.w.d $vr0, $vr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vssrarni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 1) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vssrarni.d.q(<2 x i64>, <2 x i64>, i32) + +define <2 x i64> @lsx_vssrarni_d_q(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vssrarni_d_q: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrarni.d.q $vr0, $vr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vssrarni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 1) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vssrarni.bu.h(<16 x i8>, <16 x i8>, i32) + +define <16 x i8> @lsx_vssrarni_bu_h(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vssrarni_bu_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrarni.bu.h $vr0, $vr1, 15 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vssrarni.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 15) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vssrarni.hu.w(<8 x i16>, <8 x i16>, i32) + +define <8 x i16> @lsx_vssrarni_hu_w(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vssrarni_hu_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrarni.hu.w $vr0, $vr1, 31 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vssrarni.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 31) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vssrarni.wu.d(<4 x i32>, <4 x i32>, i32) + +define <4 x i32> @lsx_vssrarni_wu_d(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vssrarni_wu_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrarni.wu.d $vr0, $vr1, 63 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vssrarni.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 63) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vssrarni.du.q(<2 x i64>, <2 x i64>, i32) + +define <2 x i64> @lsx_vssrarni_du_q(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vssrarni_du_q: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrarni.du.q $vr0, $vr1, 127 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vssrarni.du.q(<2 x i64> %va, <2 x i64> %vb, i32 127) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrln.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrln.ll new file mode 100644 index 0000000000000000000000000000000000000000..697ccc3962a81b13aab4300baa411b316da9f7d1 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrln.ll @@ -0,0 +1,74 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vssrln.b.h(<8 x i16>, <8 x i16>) + +define <16 x i8> @lsx_vssrln_b_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vssrln_b_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrln.b.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vssrln.b.h(<8 x i16> %va, <8 x i16> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vssrln.h.w(<4 x i32>, <4 x i32>) + +define <8 x i16> @lsx_vssrln_h_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vssrln_h_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrln.h.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vssrln.h.w(<4 x i32> %va, <4 x i32> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vssrln.w.d(<2 x i64>, <2 x i64>) + +define <4 x i32> @lsx_vssrln_w_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vssrln_w_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrln.w.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vssrln.w.d(<2 x i64> %va, <2 x i64> %vb) + ret <4 x i32> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vssrln.bu.h(<8 x i16>, <8 x i16>) + +define <16 x i8> @lsx_vssrln_bu_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vssrln_bu_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrln.bu.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vssrln.bu.h(<8 x i16> %va, <8 x i16> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vssrln.hu.w(<4 x i32>, <4 x i32>) + +define <8 x i16> @lsx_vssrln_hu_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vssrln_hu_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrln.hu.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vssrln.hu.w(<4 x i32> %va, <4 x i32> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vssrln.wu.d(<2 x i64>, <2 x i64>) + +define <4 x i32> @lsx_vssrln_wu_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vssrln_wu_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrln.wu.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vssrln.wu.d(<2 x i64> %va, <2 x i64> %vb) + ret <4 x i32> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlni-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlni-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..6218af1fa773f3a63f54e4a39530411c7da3a9aa --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlni-invalid-imm.ll @@ -0,0 +1,129 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vssrlni.b.h(<16 x i8>, <16 x i8>, i32) + +define <16 x i8> @lsx_vssrlni_b_h_lo(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrlni.b.h: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vssrlni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 -1) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vssrlni_b_h_hi(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrlni.b.h: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vssrlni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 16) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vssrlni.h.w(<8 x i16>, <8 x i16>, i32) + +define <8 x i16> @lsx_vssrlni_h_w_lo(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrlni.h.w: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vssrlni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 -1) + ret <8 x i16> %res +} + +define <8 x i16> @lsx_vssrlni_h_w_hi(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrlni.h.w: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vssrlni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 32) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vssrlni.w.d(<4 x i32>, <4 x i32>, i32) + +define <4 x i32> @lsx_vssrlni_w_d_lo(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrlni.w.d: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vssrlni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 -1) + ret <4 x i32> %res +} + +define <4 x i32> @lsx_vssrlni_w_d_hi(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrlni.w.d: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vssrlni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 64) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vssrlni.d.q(<2 x i64>, <2 x i64>, i32) + +define <2 x i64> @lsx_vssrlni_d_q_lo(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrlni.d.q: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vssrlni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 -1) + ret <2 x i64> %res +} + +define <2 x i64> @lsx_vssrlni_d_q_hi(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrlni.d.q: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vssrlni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 128) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vssrlni.bu.h(<16 x i8>, <16 x i8>, i32) + +define <16 x i8> @lsx_vssrlni_bu_h_lo(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrlni.bu.h: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vssrlni.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 -1) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vssrlni_bu_h_hi(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrlni.bu.h: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vssrlni.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 16) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vssrlni.hu.w(<8 x i16>, <8 x i16>, i32) + +define <8 x i16> @lsx_vssrlni_hu_w_lo(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrlni.hu.w: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vssrlni.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 -1) + ret <8 x i16> %res +} + +define <8 x i16> @lsx_vssrlni_hu_w_hi(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrlni.hu.w: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vssrlni.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 32) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vssrlni.wu.d(<4 x i32>, <4 x i32>, i32) + +define <4 x i32> @lsx_vssrlni_wu_d_lo(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrlni.wu.d: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vssrlni.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 -1) + ret <4 x i32> %res +} + +define <4 x i32> @lsx_vssrlni_wu_d_hi(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrlni.wu.d: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vssrlni.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 64) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vssrlni.du.q(<2 x i64>, <2 x i64>, i32) + +define <2 x i64> @lsx_vssrlni_du_q_lo(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrlni.du.q: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vssrlni.du.q(<2 x i64> %va, <2 x i64> %vb, i32 -1) + ret <2 x i64> %res +} + +define <2 x i64> @lsx_vssrlni_du_q_hi(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrlni.du.q: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vssrlni.du.q(<2 x i64> %va, <2 x i64> %vb, i32 128) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlni-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlni-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..688be826f467f4c4422f9e74d3cc577c290b0315 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlni-non-imm.ll @@ -0,0 +1,73 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vssrlni.b.h(<16 x i8>, <16 x i8>, i32) + +define <16 x i8> @lsx_vssrlni_b_h(<16 x i8> %va, <16 x i8> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vssrlni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 %c) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vssrlni.h.w(<8 x i16>, <8 x i16>, i32) + +define <8 x i16> @lsx_vssrlni_h_w(<8 x i16> %va, <8 x i16> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vssrlni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 %c) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vssrlni.w.d(<4 x i32>, <4 x i32>, i32) + +define <4 x i32> @lsx_vssrlni_w_d(<4 x i32> %va, <4 x i32> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vssrlni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 %c) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vssrlni.d.q(<2 x i64>, <2 x i64>, i32) + +define <2 x i64> @lsx_vssrlni_d_q(<2 x i64> %va, <2 x i64> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vssrlni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 %c) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vssrlni.bu.h(<16 x i8>, <16 x i8>, i32) + +define <16 x i8> @lsx_vssrlni_bu_h(<16 x i8> %va, <16 x i8> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vssrlni.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 %c) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vssrlni.hu.w(<8 x i16>, <8 x i16>, i32) + +define <8 x i16> @lsx_vssrlni_hu_w(<8 x i16> %va, <8 x i16> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vssrlni.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 %c) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vssrlni.wu.d(<4 x i32>, <4 x i32>, i32) + +define <4 x i32> @lsx_vssrlni_wu_d(<4 x i32> %va, <4 x i32> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vssrlni.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 %c) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vssrlni.du.q(<2 x i64>, <2 x i64>, i32) + +define <2 x i64> @lsx_vssrlni_du_q(<2 x i64> %va, <2 x i64> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vssrlni.du.q(<2 x i64> %va, <2 x i64> %vb, i32 %c) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlni.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlni.ll new file mode 100644 index 0000000000000000000000000000000000000000..8dd41e7abe87391fdd08dadd8b26deb4fb6c4b0b --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlni.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vssrlni.b.h(<16 x i8>, <16 x i8>, i32) + +define <16 x i8> @lsx_vssrlni_b_h(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vssrlni_b_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrlni.b.h $vr0, $vr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vssrlni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 1) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vssrlni.h.w(<8 x i16>, <8 x i16>, i32) + +define <8 x i16> @lsx_vssrlni_h_w(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vssrlni_h_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrlni.h.w $vr0, $vr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vssrlni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 1) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vssrlni.w.d(<4 x i32>, <4 x i32>, i32) + +define <4 x i32> @lsx_vssrlni_w_d(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vssrlni_w_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrlni.w.d $vr0, $vr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vssrlni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 1) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vssrlni.d.q(<2 x i64>, <2 x i64>, i32) + +define <2 x i64> @lsx_vssrlni_d_q(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vssrlni_d_q: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrlni.d.q $vr0, $vr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vssrlni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 1) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vssrlni.bu.h(<16 x i8>, <16 x i8>, i32) + +define <16 x i8> @lsx_vssrlni_bu_h(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vssrlni_bu_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrlni.bu.h $vr0, $vr1, 15 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vssrlni.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 15) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vssrlni.hu.w(<8 x i16>, <8 x i16>, i32) + +define <8 x i16> @lsx_vssrlni_hu_w(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vssrlni_hu_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrlni.hu.w $vr0, $vr1, 31 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vssrlni.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 31) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vssrlni.wu.d(<4 x i32>, <4 x i32>, i32) + +define <4 x i32> @lsx_vssrlni_wu_d(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vssrlni_wu_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrlni.wu.d $vr0, $vr1, 63 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vssrlni.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 63) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vssrlni.du.q(<2 x i64>, <2 x i64>, i32) + +define <2 x i64> @lsx_vssrlni_du_q(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vssrlni_du_q: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrlni.du.q $vr0, $vr1, 127 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vssrlni.du.q(<2 x i64> %va, <2 x i64> %vb, i32 127) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrn.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrn.ll new file mode 100644 index 0000000000000000000000000000000000000000..a8e76cbaa7fd12bdfa8cf1d12ba1749606a9f9b6 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrn.ll @@ -0,0 +1,74 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vssrlrn.b.h(<8 x i16>, <8 x i16>) + +define <16 x i8> @lsx_vssrlrn_b_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vssrlrn_b_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrlrn.b.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vssrlrn.b.h(<8 x i16> %va, <8 x i16> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vssrlrn.h.w(<4 x i32>, <4 x i32>) + +define <8 x i16> @lsx_vssrlrn_h_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vssrlrn_h_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrlrn.h.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vssrlrn.h.w(<4 x i32> %va, <4 x i32> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vssrlrn.w.d(<2 x i64>, <2 x i64>) + +define <4 x i32> @lsx_vssrlrn_w_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vssrlrn_w_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrlrn.w.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vssrlrn.w.d(<2 x i64> %va, <2 x i64> %vb) + ret <4 x i32> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vssrlrn.bu.h(<8 x i16>, <8 x i16>) + +define <16 x i8> @lsx_vssrlrn_bu_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vssrlrn_bu_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrlrn.bu.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vssrlrn.bu.h(<8 x i16> %va, <8 x i16> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vssrlrn.hu.w(<4 x i32>, <4 x i32>) + +define <8 x i16> @lsx_vssrlrn_hu_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vssrlrn_hu_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrlrn.hu.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vssrlrn.hu.w(<4 x i32> %va, <4 x i32> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vssrlrn.wu.d(<2 x i64>, <2 x i64>) + +define <4 x i32> @lsx_vssrlrn_wu_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vssrlrn_wu_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrlrn.wu.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vssrlrn.wu.d(<2 x i64> %va, <2 x i64> %vb) + ret <4 x i32> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrni-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrni-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..98a0c5b3cd28a57e1fb52a7ed54ce1fbccd87186 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrni-invalid-imm.ll @@ -0,0 +1,129 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vssrlrni.b.h(<16 x i8>, <16 x i8>, i32) + +define <16 x i8> @lsx_vssrlrni_b_h_lo(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrlrni.b.h: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vssrlrni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 -1) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vssrlrni_b_h_hi(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrlrni.b.h: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vssrlrni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 16) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vssrlrni.h.w(<8 x i16>, <8 x i16>, i32) + +define <8 x i16> @lsx_vssrlrni_h_w_lo(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrlrni.h.w: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vssrlrni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 -1) + ret <8 x i16> %res +} + +define <8 x i16> @lsx_vssrlrni_h_w_hi(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrlrni.h.w: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vssrlrni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 32) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vssrlrni.w.d(<4 x i32>, <4 x i32>, i32) + +define <4 x i32> @lsx_vssrlrni_w_d_lo(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrlrni.w.d: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vssrlrni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 -1) + ret <4 x i32> %res +} + +define <4 x i32> @lsx_vssrlrni_w_d_hi(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrlrni.w.d: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vssrlrni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 64) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vssrlrni.d.q(<2 x i64>, <2 x i64>, i32) + +define <2 x i64> @lsx_vssrlrni_d_q_lo(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrlrni.d.q: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vssrlrni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 -1) + ret <2 x i64> %res +} + +define <2 x i64> @lsx_vssrlrni_d_q_hi(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrlrni.d.q: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vssrlrni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 128) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vssrlrni.bu.h(<16 x i8>, <16 x i8>, i32) + +define <16 x i8> @lsx_vssrlrni_bu_h_lo(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrlrni.bu.h: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vssrlrni.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 -1) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vssrlrni_bu_h_hi(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrlrni.bu.h: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vssrlrni.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 16) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vssrlrni.hu.w(<8 x i16>, <8 x i16>, i32) + +define <8 x i16> @lsx_vssrlrni_hu_w_lo(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrlrni.hu.w: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vssrlrni.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 -1) + ret <8 x i16> %res +} + +define <8 x i16> @lsx_vssrlrni_hu_w_hi(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrlrni.hu.w: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vssrlrni.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 32) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vssrlrni.wu.d(<4 x i32>, <4 x i32>, i32) + +define <4 x i32> @lsx_vssrlrni_wu_d_lo(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrlrni.wu.d: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vssrlrni.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 -1) + ret <4 x i32> %res +} + +define <4 x i32> @lsx_vssrlrni_wu_d_hi(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrlrni.wu.d: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vssrlrni.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 64) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vssrlrni.du.q(<2 x i64>, <2 x i64>, i32) + +define <2 x i64> @lsx_vssrlrni_du_q_lo(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrlrni.du.q: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vssrlrni.du.q(<2 x i64> %va, <2 x i64> %vb, i32 -1) + ret <2 x i64> %res +} + +define <2 x i64> @lsx_vssrlrni_du_q_hi(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK: llvm.loongarch.lsx.vssrlrni.du.q: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vssrlrni.du.q(<2 x i64> %va, <2 x i64> %vb, i32 128) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrni-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrni-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..c389b4fd6023b2cdd459bfc1e7b717a83c917e5d --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrni-non-imm.ll @@ -0,0 +1,73 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vssrlrni.b.h(<16 x i8>, <16 x i8>, i32) + +define <16 x i8> @lsx_vssrlrni_b_h(<16 x i8> %va, <16 x i8> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vssrlrni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 %c) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vssrlrni.h.w(<8 x i16>, <8 x i16>, i32) + +define <8 x i16> @lsx_vssrlrni_h_w(<8 x i16> %va, <8 x i16> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vssrlrni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 %c) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vssrlrni.w.d(<4 x i32>, <4 x i32>, i32) + +define <4 x i32> @lsx_vssrlrni_w_d(<4 x i32> %va, <4 x i32> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vssrlrni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 %c) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vssrlrni.d.q(<2 x i64>, <2 x i64>, i32) + +define <2 x i64> @lsx_vssrlrni_d_q(<2 x i64> %va, <2 x i64> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vssrlrni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 %c) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vssrlrni.bu.h(<16 x i8>, <16 x i8>, i32) + +define <16 x i8> @lsx_vssrlrni_bu_h(<16 x i8> %va, <16 x i8> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vssrlrni.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 %c) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vssrlrni.hu.w(<8 x i16>, <8 x i16>, i32) + +define <8 x i16> @lsx_vssrlrni_hu_w(<8 x i16> %va, <8 x i16> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vssrlrni.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 %c) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vssrlrni.wu.d(<4 x i32>, <4 x i32>, i32) + +define <4 x i32> @lsx_vssrlrni_wu_d(<4 x i32> %va, <4 x i32> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vssrlrni.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 %c) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vssrlrni.du.q(<2 x i64>, <2 x i64>, i32) + +define <2 x i64> @lsx_vssrlrni_du_q(<2 x i64> %va, <2 x i64> %vb, i32 %c) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vssrlrni.du.q(<2 x i64> %va, <2 x i64> %vb, i32 %c) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrni.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrni.ll new file mode 100644 index 0000000000000000000000000000000000000000..869e81b2b09d65550f741776bd29c6f71eef3547 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssrlrni.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vssrlrni.b.h(<16 x i8>, <16 x i8>, i32) + +define <16 x i8> @lsx_vssrlrni_b_h(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vssrlrni_b_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrlrni.b.h $vr0, $vr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vssrlrni.b.h(<16 x i8> %va, <16 x i8> %vb, i32 1) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vssrlrni.h.w(<8 x i16>, <8 x i16>, i32) + +define <8 x i16> @lsx_vssrlrni_h_w(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vssrlrni_h_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrlrni.h.w $vr0, $vr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vssrlrni.h.w(<8 x i16> %va, <8 x i16> %vb, i32 1) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vssrlrni.w.d(<4 x i32>, <4 x i32>, i32) + +define <4 x i32> @lsx_vssrlrni_w_d(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vssrlrni_w_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrlrni.w.d $vr0, $vr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vssrlrni.w.d(<4 x i32> %va, <4 x i32> %vb, i32 1) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vssrlrni.d.q(<2 x i64>, <2 x i64>, i32) + +define <2 x i64> @lsx_vssrlrni_d_q(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vssrlrni_d_q: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrlrni.d.q $vr0, $vr1, 1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vssrlrni.d.q(<2 x i64> %va, <2 x i64> %vb, i32 1) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vssrlrni.bu.h(<16 x i8>, <16 x i8>, i32) + +define <16 x i8> @lsx_vssrlrni_bu_h(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vssrlrni_bu_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrlrni.bu.h $vr0, $vr1, 15 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vssrlrni.bu.h(<16 x i8> %va, <16 x i8> %vb, i32 15) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vssrlrni.hu.w(<8 x i16>, <8 x i16>, i32) + +define <8 x i16> @lsx_vssrlrni_hu_w(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vssrlrni_hu_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrlrni.hu.w $vr0, $vr1, 31 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vssrlrni.hu.w(<8 x i16> %va, <8 x i16> %vb, i32 31) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vssrlrni.wu.d(<4 x i32>, <4 x i32>, i32) + +define <4 x i32> @lsx_vssrlrni_wu_d(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vssrlrni_wu_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrlrni.wu.d $vr0, $vr1, 63 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vssrlrni.wu.d(<4 x i32> %va, <4 x i32> %vb, i32 63) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vssrlrni.du.q(<2 x i64>, <2 x i64>, i32) + +define <2 x i64> @lsx_vssrlrni_du_q(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vssrlrni_du_q: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssrlrni.du.q $vr0, $vr1, 127 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vssrlrni.du.q(<2 x i64> %va, <2 x i64> %vb, i32 127) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssub.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssub.ll new file mode 100644 index 0000000000000000000000000000000000000000..c594b426d65031c62e990924df66ac86bb165dd4 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-ssub.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vssub.b(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vssub_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vssub_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssub.b $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vssub.b(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vssub.h(<8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vssub_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vssub_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssub.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vssub.h(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vssub.w(<4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vssub_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vssub_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssub.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vssub.w(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vssub.d(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vssub_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vssub_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssub.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vssub.d(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.loongarch.lsx.vssub.bu(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vssub_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vssub_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssub.bu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vssub.bu(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vssub.hu(<8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vssub_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vssub_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssub.hu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vssub.hu(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vssub.wu(<4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vssub_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vssub_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssub.wu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vssub.wu(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vssub.du(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vssub_du(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vssub_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vssub.du $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vssub.du(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-st-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-st-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..64518380964b4f062c0355a9037c62074086328b --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-st-invalid-imm.ll @@ -0,0 +1,17 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare void @llvm.loongarch.lsx.vst(<16 x i8>, i8*, i32) + +define void @lsx_vst_lo(<16 x i8> %va, i8* %p) nounwind { +; CHECK: llvm.loongarch.lsx.vst: argument out of range +entry: + call void @llvm.loongarch.lsx.vst(<16 x i8> %va, i8* %p, i32 -2049) + ret void +} + +define void @lsx_vst_hi(<16 x i8> %va, i8* %p) nounwind { +; CHECK: llvm.loongarch.lsx.vst: argument out of range +entry: + call void @llvm.loongarch.lsx.vst(<16 x i8> %va, i8* %p, i32 2048) + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-st-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-st-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..119ed9b786586d1700ceecf562f0e3f567f9f0f2 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-st-non-imm.ll @@ -0,0 +1,10 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare void @llvm.loongarch.lsx.vst(<16 x i8>, i8*, i32) + +define void @lsx_vst(<16 x i8> %va, i8* %p, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + call void @llvm.loongarch.lsx.vst(<16 x i8> %va, i8* %p, i32 %b) + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-st.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-st.ll new file mode 100644 index 0000000000000000000000000000000000000000..798f509f2318e9135f027918f3a25ae87bc196c6 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-st.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare void @llvm.loongarch.lsx.vst(<16 x i8>, i8*, i32) + +define void @lsx_vst(<16 x i8> %va, i8* %p) nounwind { +; CHECK-LABEL: lsx_vst: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vst $vr0, $a0, -2048 +; CHECK-NEXT: ret +entry: + call void @llvm.loongarch.lsx.vst(<16 x i8> %va, i8* %p, i32 -2048) + ret void +} + +declare void @llvm.loongarch.lsx.vstx(<16 x i8>, i8*, i64) + +define void @lsx_vstx(<16 x i8> %va, i8* %p, i64 %c) nounwind { +; CHECK-LABEL: lsx_vstx: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vstx $vr0, $a0, $a1 +; CHECK-NEXT: ret +entry: + call void @llvm.loongarch.lsx.vstx(<16 x i8> %va, i8* %p, i64 %c) + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-stelm-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-stelm-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..277abcbd34ccbe2cfc0d8f90a5da24997059079a --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-stelm-invalid-imm.ll @@ -0,0 +1,121 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare void @llvm.loongarch.lsx.vstelm.b(<16 x i8>, i8*, i32, i32) + +define void @lsx_vstelm_b_lo(<16 x i8> %va, i8* %p) nounwind { +; CHECK: llvm.loongarch.lsx.vstelm.b: argument out of range +entry: + call void @llvm.loongarch.lsx.vstelm.b(<16 x i8> %va, i8* %p, i32 -129, i32 15) + ret void +} + +define void @lsx_vstelm_b_hi(<16 x i8> %va, i8* %p) nounwind { +; CHECK: llvm.loongarch.lsx.vstelm.b: argument out of range +entry: + call void @llvm.loongarch.lsx.vstelm.b(<16 x i8> %va, i8* %p, i32 128, i32 15) + ret void +} + +define void @lsx_vstelm_b_idx_lo(<16 x i8> %va, i8* %p) nounwind { +; CHECK: llvm.loongarch.lsx.vstelm.b: argument out of range +entry: + call void @llvm.loongarch.lsx.vstelm.b(<16 x i8> %va, i8* %p, i32 1, i32 -1) + ret void +} + +define void @lsx_vstelm_b_idx_hi(<16 x i8> %va, i8* %p) nounwind { +; CHECK: llvm.loongarch.lsx.vstelm.b: argument out of range +entry: + call void @llvm.loongarch.lsx.vstelm.b(<16 x i8> %va, i8* %p, i32 1, i32 16) + ret void +} + +declare void @llvm.loongarch.lsx.vstelm.h(<8 x i16>, i8*, i32, i32) + +define void @lsx_vstelm_h_lo(<8 x i16> %va, i8* %p) nounwind { +; CHECK: llvm.loongarch.lsx.vstelm.h: argument out of range or not a multiple of 2. +entry: + call void @llvm.loongarch.lsx.vstelm.h(<8 x i16> %va, i8* %p, i32 -258, i32 7) + ret void +} + +define void @lsx_vstelm_h_hi(<8 x i16> %va, i8* %p) nounwind { +; CHECK: llvm.loongarch.lsx.vstelm.h: argument out of range or not a multiple of 2. +entry: + call void @llvm.loongarch.lsx.vstelm.h(<8 x i16> %va, i8* %p, i32 256, i32 7) + ret void +} + +define void @lsx_vstelm_h_idx_lo(<8 x i16> %va, i8* %p) nounwind { +; CHECK: llvm.loongarch.lsx.vstelm.h: argument out of range or not a multiple of 2. +entry: + call void @llvm.loongarch.lsx.vstelm.h(<8 x i16> %va, i8* %p, i32 2, i32 -1) + ret void +} + +define void @lsx_vstelm_h_idx_hi(<8 x i16> %va, i8* %p) nounwind { +; CHECK: llvm.loongarch.lsx.vstelm.h: argument out of range or not a multiple of 2. +entry: + call void @llvm.loongarch.lsx.vstelm.h(<8 x i16> %va, i8* %p, i32 2, i32 8) + ret void +} + +declare void @llvm.loongarch.lsx.vstelm.w(<4 x i32>, i8*, i32, i32) + +define void @lsx_vstelm_w_lo(<4 x i32> %va, i8* %p) nounwind { +; CHECK: llvm.loongarch.lsx.vstelm.w: argument out of range or not a multiple of 4. +entry: + call void @llvm.loongarch.lsx.vstelm.w(<4 x i32> %va, i8* %p, i32 -516, i32 3) + ret void +} + +define void @lsx_vstelm_w_hi(<4 x i32> %va, i8* %p) nounwind { +; CHECK: llvm.loongarch.lsx.vstelm.w: argument out of range or not a multiple of 4. +entry: + call void @llvm.loongarch.lsx.vstelm.w(<4 x i32> %va, i8* %p, i32 512, i32 3) + ret void +} + +define void @lsx_vstelm_w_idx_lo(<4 x i32> %va, i8* %p) nounwind { +; CHECK: llvm.loongarch.lsx.vstelm.w: argument out of range or not a multiple of 4. +entry: + call void @llvm.loongarch.lsx.vstelm.w(<4 x i32> %va, i8* %p, i32 4, i32 -1) + ret void +} + +define void @lsx_vstelm_w_idx_hi(<4 x i32> %va, i8* %p) nounwind { +; CHECK: llvm.loongarch.lsx.vstelm.w: argument out of range or not a multiple of 4. +entry: + call void @llvm.loongarch.lsx.vstelm.w(<4 x i32> %va, i8* %p, i32 4, i32 4) + ret void +} + +declare void @llvm.loongarch.lsx.vstelm.d(<2 x i64>, i8*, i32, i32) + +define void @lsx_vstelm_d_lo(<2 x i64> %va, i8* %p) nounwind { +; CHECK: llvm.loongarch.lsx.vstelm.d: argument out of range or not a multiple of 8. +entry: + call void @llvm.loongarch.lsx.vstelm.d(<2 x i64> %va, i8* %p, i32 -1032, i32 1) + ret void +} + +define void @lsx_vstelm_d_hi(<2 x i64> %va, i8* %p) nounwind { +; CHECK: llvm.loongarch.lsx.vstelm.d: argument out of range or not a multiple of 8. +entry: + call void @llvm.loongarch.lsx.vstelm.d(<2 x i64> %va, i8* %p, i32 1024, i32 1) + ret void +} + +define void @lsx_vstelm_d_idx_lo(<2 x i64> %va, i8* %p) nounwind { +; CHECK: llvm.loongarch.lsx.vstelm.d: argument out of range or not a multiple of 8. +entry: + call void @llvm.loongarch.lsx.vstelm.d(<2 x i64> %va, i8* %p, i32 8, i32 -1) + ret void +} + +define void @lsx_vstelm_d_idx_hi(<2 x i64> %va, i8* %p) nounwind { +; CHECK: llvm.loongarch.lsx.vstelm.d: argument out of range or not a multiple of 8. +entry: + call void @llvm.loongarch.lsx.vstelm.d(<2 x i64> %va, i8* %p, i32 8, i32 2) + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-stelm-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-stelm-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..f53932f7903551d4c5e322ba44498278fff902f7 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-stelm-non-imm.ll @@ -0,0 +1,65 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare void @llvm.loongarch.lsx.vstelm.b(<16 x i8>, i8*, i32, i32) + +define void @lsx_vstelm_b(<16 x i8> %va, i8* %p, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + call void @llvm.loongarch.lsx.vstelm.b(<16 x i8> %va, i8* %p, i32 %b, i32 1) + ret void +} + +define void @lsx_vstelm_b_idx(<16 x i8> %va, i8* %p, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + call void @llvm.loongarch.lsx.vstelm.b(<16 x i8> %va, i8* %p, i32 1, i32 %b) + ret void +} + +declare void @llvm.loongarch.lsx.vstelm.h(<8 x i16>, i8*, i32, i32) + +define void @lsx_vstelm_h(<8 x i16> %va, i8* %p, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + call void @llvm.loongarch.lsx.vstelm.h(<8 x i16> %va, i8* %p, i32 %b, i32 1) + ret void +} + +define void @lsx_vstelm_h_idx(<8 x i16> %va, i8* %p, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + call void @llvm.loongarch.lsx.vstelm.h(<8 x i16> %va, i8* %p, i32 2, i32 %b) + ret void +} + +declare void @llvm.loongarch.lsx.vstelm.w(<4 x i32>, i8*, i32, i32) + +define void @lsx_vstelm_w(<4 x i32> %va, i8* %p, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + call void @llvm.loongarch.lsx.vstelm.w(<4 x i32> %va, i8* %p, i32 %b, i32 1) + ret void +} + +define void @lsx_vstelm_w_idx(<4 x i32> %va, i8* %p, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + call void @llvm.loongarch.lsx.vstelm.w(<4 x i32> %va, i8* %p, i32 4, i32 %b) + ret void +} + +declare void @llvm.loongarch.lsx.vstelm.d(<2 x i64>, i8*, i32, i32) + +define void @lsx_vstelm_d(<2 x i64> %va, i8* %p, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + call void @llvm.loongarch.lsx.vstelm.d(<2 x i64> %va, i8* %p, i32 %b, i32 1) + ret void +} + +define void @lsx_vstelm_d_idx(<2 x i64> %va, i8* %p, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + call void @llvm.loongarch.lsx.vstelm.d(<2 x i64> %va, i8* %p, i32 8, i32 %b) + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-stelm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-stelm.ll new file mode 100644 index 0000000000000000000000000000000000000000..6b9e7a9d7462e2ba85cd20748602a1fe2d98586e --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-stelm.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare void @llvm.loongarch.lsx.vstelm.b(<16 x i8>, i8*, i32, i32) + +define void @lsx_vstelm_b(<16 x i8> %va, i8* %p) nounwind { +; CHECK-LABEL: lsx_vstelm_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vstelm.b $vr0, $a0, 1, 15 +; CHECK-NEXT: ret +entry: + call void @llvm.loongarch.lsx.vstelm.b(<16 x i8> %va, i8* %p, i32 1, i32 15) + ret void +} + +declare void @llvm.loongarch.lsx.vstelm.h(<8 x i16>, i8*, i32, i32) + +define void @lsx_vstelm_h(<8 x i16> %va, i8* %p) nounwind { +; CHECK-LABEL: lsx_vstelm_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vstelm.h $vr0, $a0, 2, 7 +; CHECK-NEXT: ret +entry: + call void @llvm.loongarch.lsx.vstelm.h(<8 x i16> %va, i8* %p, i32 2, i32 7) + ret void +} + +declare void @llvm.loongarch.lsx.vstelm.w(<4 x i32>, i8*, i32, i32) + +define void @lsx_vstelm_w(<4 x i32> %va, i8* %p) nounwind { +; CHECK-LABEL: lsx_vstelm_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vstelm.w $vr0, $a0, 4, 3 +; CHECK-NEXT: ret +entry: + call void @llvm.loongarch.lsx.vstelm.w(<4 x i32> %va, i8* %p, i32 4, i32 3) + ret void +} + +declare void @llvm.loongarch.lsx.vstelm.d(<2 x i64>, i8*, i32, i32) + +define void @lsx_vstelm_d(<2 x i64> %va, i8* %p) nounwind { +; CHECK-LABEL: lsx_vstelm_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vstelm.d $vr0, $a0, 8, 1 +; CHECK-NEXT: ret +entry: + call void @llvm.loongarch.lsx.vstelm.d(<2 x i64> %va, i8* %p, i32 8, i32 1) + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sub.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sub.ll new file mode 100644 index 0000000000000000000000000000000000000000..5c04a3d8de0df6071cc5c277794d2fc5d895546b --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-sub.ll @@ -0,0 +1,62 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vsub.b(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vsub_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vsub_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsub.b $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsub.b(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsub.h(<8 x i16>, <8 x i16>) + +define <8 x i16> @lsx_vsub_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vsub_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsub.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsub.h(<8 x i16> %va, <8 x i16> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsub.w(<4 x i32>, <4 x i32>) + +define <4 x i32> @lsx_vsub_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vsub_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsub.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsub.w(<4 x i32> %va, <4 x i32> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsub.d(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vsub_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vsub_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsub.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsub.d(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsub.q(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vsub_q(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vsub_q: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsub.q $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsub.q(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-subi-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-subi-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..96cc1241fbf3f42edd804976acda04cb86f94ca6 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-subi-invalid-imm.ll @@ -0,0 +1,65 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vsubi.bu(<16 x i8>, i32) + +define <16 x i8> @lsx_vsubi_bu_lo(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsubi.bu: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsubi.bu(<16 x i8> %va, i32 -1) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vsubi_bu_hi(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsubi.bu: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsubi.bu(<16 x i8> %va, i32 32) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsubi.hu(<8 x i16>, i32) + +define <8 x i16> @lsx_vsubi_hu_lo(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsubi.hu: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsubi.hu(<8 x i16> %va, i32 -1) + ret <8 x i16> %res +} + +define <8 x i16> @lsx_vsubi_hu_hi(<8 x i16> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsubi.hu: argument out of range +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsubi.hu(<8 x i16> %va, i32 32) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsubi.wu(<4 x i32>, i32) + +define <4 x i32> @lsx_vsubi_wu_lo(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsubi.wu: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsubi.wu(<4 x i32> %va, i32 -1) + ret <4 x i32> %res +} + +define <4 x i32> @lsx_vsubi_wu_hi(<4 x i32> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsubi.wu: argument out of range +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsubi.wu(<4 x i32> %va, i32 32) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsubi.du(<2 x i64>, i32) + +define <2 x i64> @lsx_vsubi_du_lo(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsubi.du: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsubi.du(<2 x i64> %va, i32 -1) + ret <2 x i64> %res +} + +define <2 x i64> @lsx_vsubi_du_hi(<2 x i64> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vsubi.du: argument out of range +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsubi.du(<2 x i64> %va, i32 32) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-subi-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-subi-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..162f9ad131c75cee7bb8b44ac68e67cc566227e8 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-subi-non-imm.ll @@ -0,0 +1,37 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vsubi.bu(<16 x i8>, i32) + +define <16 x i8> @lsx_vsubi_bu(<16 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsubi.bu(<16 x i8> %va, i32 %b) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsubi.hu(<8 x i16>, i32) + +define <8 x i16> @lsx_vsubi_hu(<8 x i16> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsubi.hu(<8 x i16> %va, i32 %b) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsubi.wu(<4 x i32>, i32) + +define <4 x i32> @lsx_vsubi_wu(<4 x i32> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsubi.wu(<4 x i32> %va, i32 %b) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsubi.du(<2 x i64>, i32) + +define <2 x i64> @lsx_vsubi_du(<2 x i64> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsubi.du(<2 x i64> %va, i32 %b) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-subi.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-subi.ll new file mode 100644 index 0000000000000000000000000000000000000000..304a4e4a78cc7c3529906a96cd838ec4a7536500 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-subi.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vsubi.bu(<16 x i8>, i32) + +define <16 x i8> @lsx_vsubi_bu(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_vsubi_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsubi.bu $vr0, $vr0, 31 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vsubi.bu(<16 x i8> %va, i32 31) + ret <16 x i8> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsubi.hu(<8 x i16>, i32) + +define <8 x i16> @lsx_vsubi_hu(<8 x i16> %va) nounwind { +; CHECK-LABEL: lsx_vsubi_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsubi.hu $vr0, $vr0, 31 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsubi.hu(<8 x i16> %va, i32 31) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsubi.wu(<4 x i32>, i32) + +define <4 x i32> @lsx_vsubi_wu(<4 x i32> %va) nounwind { +; CHECK-LABEL: lsx_vsubi_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsubi.wu $vr0, $vr0, 31 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsubi.wu(<4 x i32> %va, i32 31) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsubi.du(<2 x i64>, i32) + +define <2 x i64> @lsx_vsubi_du(<2 x i64> %va) nounwind { +; CHECK-LABEL: lsx_vsubi_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsubi.du $vr0, $vr0, 31 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsubi.du(<2 x i64> %va, i32 31) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-subw.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-subw.ll new file mode 100644 index 0000000000000000000000000000000000000000..48100db743344ef9a02644f22aef4a547cb49272 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-subw.ll @@ -0,0 +1,194 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <8 x i16> @llvm.loongarch.lsx.vsubwev.h.b(<16 x i8>, <16 x i8>) + +define <8 x i16> @lsx_vsubwev_h_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vsubwev_h_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsubwev.h.b $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.b(<16 x i8> %va, <16 x i8> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsubwev.w.h(<8 x i16>, <8 x i16>) + +define <4 x i32> @lsx_vsubwev_w_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vsubwev_w_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsubwev.w.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.h(<8 x i16> %va, <8 x i16> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsubwev.d.w(<4 x i32>, <4 x i32>) + +define <2 x i64> @lsx_vsubwev_d_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vsubwev_d_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsubwev.d.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.w(<4 x i32> %va, <4 x i32> %vb) + ret <2 x i64> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsubwev.q.d(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vsubwev_q_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vsubwev_q_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsubwev.q.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.d(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsubwev.h.bu(<16 x i8>, <16 x i8>) + +define <8 x i16> @lsx_vsubwev_h_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vsubwev_h_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsubwev.h.bu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.bu(<16 x i8> %va, <16 x i8> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsubwev.w.hu(<8 x i16>, <8 x i16>) + +define <4 x i32> @lsx_vsubwev_w_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vsubwev_w_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsubwev.w.hu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.hu(<8 x i16> %va, <8 x i16> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsubwev.d.wu(<4 x i32>, <4 x i32>) + +define <2 x i64> @lsx_vsubwev_d_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vsubwev_d_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsubwev.d.wu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.wu(<4 x i32> %va, <4 x i32> %vb) + ret <2 x i64> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsubwev.q.du(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vsubwev_q_du(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vsubwev_q_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsubwev.q.du $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.du(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsubwod.h.b(<16 x i8>, <16 x i8>) + +define <8 x i16> @lsx_vsubwod_h_b(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vsubwod_h_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsubwod.h.b $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.b(<16 x i8> %va, <16 x i8> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsubwod.w.h(<8 x i16>, <8 x i16>) + +define <4 x i32> @lsx_vsubwod_w_h(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vsubwod_w_h: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsubwod.w.h $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.h(<8 x i16> %va, <8 x i16> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsubwod.d.w(<4 x i32>, <4 x i32>) + +define <2 x i64> @lsx_vsubwod_d_w(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vsubwod_d_w: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsubwod.d.w $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.w(<4 x i32> %va, <4 x i32> %vb) + ret <2 x i64> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsubwod.q.d(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vsubwod_q_d(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vsubwod_q_d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsubwod.q.d $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.d(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} + +declare <8 x i16> @llvm.loongarch.lsx.vsubwod.h.bu(<16 x i8>, <16 x i8>) + +define <8 x i16> @lsx_vsubwod_h_bu(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vsubwod_h_bu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsubwod.h.bu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.bu(<16 x i8> %va, <16 x i8> %vb) + ret <8 x i16> %res +} + +declare <4 x i32> @llvm.loongarch.lsx.vsubwod.w.hu(<8 x i16>, <8 x i16>) + +define <4 x i32> @lsx_vsubwod_w_hu(<8 x i16> %va, <8 x i16> %vb) nounwind { +; CHECK-LABEL: lsx_vsubwod_w_hu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsubwod.w.hu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.hu(<8 x i16> %va, <8 x i16> %vb) + ret <4 x i32> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsubwod.d.wu(<4 x i32>, <4 x i32>) + +define <2 x i64> @lsx_vsubwod_d_wu(<4 x i32> %va, <4 x i32> %vb) nounwind { +; CHECK-LABEL: lsx_vsubwod_d_wu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsubwod.d.wu $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.wu(<4 x i32> %va, <4 x i32> %vb) + ret <2 x i64> %res +} + +declare <2 x i64> @llvm.loongarch.lsx.vsubwod.q.du(<2 x i64>, <2 x i64>) + +define <2 x i64> @lsx_vsubwod_q_du(<2 x i64> %va, <2 x i64> %vb) nounwind { +; CHECK-LABEL: lsx_vsubwod_q_du: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsubwod.q.du $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.du(<2 x i64> %va, <2 x i64> %vb) + ret <2 x i64> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-xor.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-xor.ll new file mode 100644 index 0000000000000000000000000000000000000000..72a1fe93c2c0135f1e128d7fd5c649353c6a6055 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-xor.ll @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vxor.v(<16 x i8>, <16 x i8>) + +define <16 x i8> @lsx_vxor_v(<16 x i8> %va, <16 x i8> %vb) nounwind { +; CHECK-LABEL: lsx_vxor_v: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vxor.v(<16 x i8> %va, <16 x i8> %vb) + ret <16 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-xori-invalid-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-xori-invalid-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..5f5613189ac81389c27496f879bf9100471e885d --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-xori-invalid-imm.ll @@ -0,0 +1,17 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vxori.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vxori_b_lo(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vxori.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vxori.b(<16 x i8> %va, i32 -1) + ret <16 x i8> %res +} + +define <16 x i8> @lsx_vxori_b_hi(<16 x i8> %va) nounwind { +; CHECK: llvm.loongarch.lsx.vxori.b: argument out of range +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vxori.b(<16 x i8> %va, i32 256) + ret <16 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-xori-non-imm.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-xori-non-imm.ll new file mode 100644 index 0000000000000000000000000000000000000000..4238d89120f1a6650a456d982771e7e002c5177e --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-xori-non-imm.ll @@ -0,0 +1,10 @@ +; RUN: not llc --mtriple=loongarch64 --mattr=+lsx < %s 2>&1 | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vxori.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vxori_b(<16 x i8> %va, i32 %b) nounwind { +; CHECK: immarg operand has non-immediate parameter +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vxori.b(<16 x i8> %va, i32 %b) + ret <16 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-xori.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-xori.ll new file mode 100644 index 0000000000000000000000000000000000000000..09669cd5ac14c733fbc887d01537881b068fa1a4 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-xori.ll @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +declare <16 x i8> @llvm.loongarch.lsx.vxori.b(<16 x i8>, i32) + +define <16 x i8> @lsx_vxori_b(<16 x i8> %va) nounwind { +; CHECK-LABEL: lsx_vxori_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vxori.b $vr0, $vr0, 3 +; CHECK-NEXT: ret +entry: + %res = call <16 x i8> @llvm.loongarch.lsx.vxori.b(<16 x i8> %va, i32 3) + ret <16 x i8> %res +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/add.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/add.ll new file mode 100644 index 0000000000000000000000000000000000000000..2a7c37c2ae346efa58e08f2a2473559d7e2133fd --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/add.ll @@ -0,0 +1,122 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +define void @add_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: add_v16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vadd.b $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i8>, ptr %a0 + %v1 = load <16 x i8>, ptr %a1 + %v2 = add <16 x i8> %v0, %v1 + store <16 x i8> %v2, ptr %res + ret void +} + +define void @add_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: add_v8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vadd.h $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i16>, ptr %a0 + %v1 = load <8 x i16>, ptr %a1 + %v2 = add <8 x i16> %v0, %v1 + store <8 x i16> %v2, ptr %res + ret void +} + +define void @add_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: add_v4i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vadd.w $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i32>, ptr %a0 + %v1 = load <4 x i32>, ptr %a1 + %v2 = add <4 x i32> %v0, %v1 + store <4 x i32> %v2, ptr %res + ret void +} + +define void @add_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: add_v2i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vadd.d $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <2 x i64>, ptr %a0 + %v1 = load <2 x i64>, ptr %a1 + %v2 = add <2 x i64> %v0, %v1 + store <2 x i64> %v2, ptr %res + ret void +} + +define void @add_v16i8_31(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: add_v16i8_31: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vaddi.bu $vr0, $vr0, 31 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i8>, ptr %a0 + %v1 = add <16 x i8> %v0, + store <16 x i8> %v1, ptr %res + ret void +} + +define void @add_v8i16_31(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: add_v8i16_31: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vaddi.hu $vr0, $vr0, 31 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i16>, ptr %a0 + %v1 = add <8 x i16> %v0, + store <8 x i16> %v1, ptr %res + ret void +} + +define void @add_v4i32_31(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: add_v4i32_31: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vaddi.wu $vr0, $vr0, 31 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i32>, ptr %a0 + %v1 = add <4 x i32> %v0, + store <4 x i32> %v1, ptr %res + ret void +} + +define void @add_v2i64_31(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: add_v2i64_31: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vaddi.du $vr0, $vr0, 31 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <2 x i64>, ptr %a0 + %v1 = add <2 x i64> %v0, + store <2 x i64> %v1, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/and.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/and.ll new file mode 100644 index 0000000000000000000000000000000000000000..523255159a811525440b470a2b04120b96a8e3f2 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/and.ll @@ -0,0 +1,125 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +define void @and_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: and_v16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vand.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i8>, ptr %a0 + %v1 = load <16 x i8>, ptr %a1 + %v2 = and <16 x i8> %v0, %v1 + store <16 x i8> %v2, ptr %res + ret void +} + +define void @and_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: and_v8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vand.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i16>, ptr %a0 + %v1 = load <8 x i16>, ptr %a1 + %v2 = and <8 x i16> %v0, %v1 + store <8 x i16> %v2, ptr %res + ret void +} + +define void @and_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: and_v4i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vand.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i32>, ptr %a0 + %v1 = load <4 x i32>, ptr %a1 + %v2 = and <4 x i32> %v0, %v1 + store <4 x i32> %v2, ptr %res + ret void +} + +define void @and_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: and_v2i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vand.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <2 x i64>, ptr %a0 + %v1 = load <2 x i64>, ptr %a1 + %v2 = and <2 x i64> %v0, %v1 + store <2 x i64> %v2, ptr %res + ret void +} + +define void @and_u_v16i8(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: and_u_v16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vandi.b $vr0, $vr0, 31 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i8>, ptr %a0 + %v1 = and <16 x i8> %v0, + store <16 x i8> %v1, ptr %res + ret void +} + +define void @and_u_v8i16(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: and_u_v8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vrepli.h $vr1, 31 +; CHECK-NEXT: vand.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i16>, ptr %a0 + %v1 = and <8 x i16> %v0, + store <8 x i16> %v1, ptr %res + ret void +} + +define void @and_u_v4i32(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: and_u_v4i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vrepli.w $vr1, 31 +; CHECK-NEXT: vand.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i32>, ptr %a0 + %v1 = and <4 x i32> %v0, + store <4 x i32> %v1, ptr %res + ret void +} + +define void @and_u_v2i64(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: and_u_v2i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vrepli.d $vr1, 31 +; CHECK-NEXT: vand.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <2 x i64>, ptr %a0 + %v1 = and <2 x i64> %v0, + store <2 x i64> %v1, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/ashr.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/ashr.ll new file mode 100644 index 0000000000000000000000000000000000000000..fbc570d77ba8038a8962a52f6319c58c945611fc --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/ashr.ll @@ -0,0 +1,178 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +define void @ashr_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: ashr_v16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vsra.b $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i8>, ptr %a0 + %v1 = load <16 x i8>, ptr %a1 + %v2 = ashr <16 x i8> %v0, %v1 + store <16 x i8> %v2, ptr %res + ret void +} + +define void @ashr_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: ashr_v8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vsra.h $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i16>, ptr %a0 + %v1 = load <8 x i16>, ptr %a1 + %v2 = ashr <8 x i16> %v0, %v1 + store <8 x i16> %v2, ptr %res + ret void +} + +define void @ashr_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: ashr_v4i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vsra.w $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i32>, ptr %a0 + %v1 = load <4 x i32>, ptr %a1 + %v2 = ashr <4 x i32> %v0, %v1 + store <4 x i32> %v2, ptr %res + ret void +} + +define void @ashr_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: ashr_v2i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vsra.d $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <2 x i64>, ptr %a0 + %v1 = load <2 x i64>, ptr %a1 + %v2 = ashr <2 x i64> %v0, %v1 + store <2 x i64> %v2, ptr %res + ret void +} + +define void @ashr_v16i8_1(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: ashr_v16i8_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vsrai.b $vr0, $vr0, 1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i8>, ptr %a0 + %v1 = ashr <16 x i8> %v0, + store <16 x i8> %v1, ptr %res + ret void +} + +define void @ashr_v16i8_7(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: ashr_v16i8_7: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vsrai.b $vr0, $vr0, 7 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i8>, ptr %a0 + %v1 = ashr <16 x i8> %v0, + store <16 x i8> %v1, ptr %res + ret void +} + +define void @ashr_v8i16_1(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: ashr_v8i16_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vsrai.h $vr0, $vr0, 1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i16>, ptr %a0 + %v1 = ashr <8 x i16> %v0, + store <8 x i16> %v1, ptr %res + ret void +} + +define void @ashr_v8i16_15(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: ashr_v8i16_15: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vsrai.h $vr0, $vr0, 15 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i16>, ptr %a0 + %v1 = ashr <8 x i16> %v0, + store <8 x i16> %v1, ptr %res + ret void +} + +define void @ashr_v4i32_1(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: ashr_v4i32_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vsrai.w $vr0, $vr0, 1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i32>, ptr %a0 + %v1 = ashr <4 x i32> %v0, + store <4 x i32> %v1, ptr %res + ret void +} + +define void @ashr_v4i32_31(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: ashr_v4i32_31: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vsrai.w $vr0, $vr0, 31 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i32>, ptr %a0 + %v1 = ashr <4 x i32> %v0, + store <4 x i32> %v1, ptr %res + ret void +} + +define void @ashr_v2i64_1(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: ashr_v2i64_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vsrai.d $vr0, $vr0, 1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <2 x i64>, ptr %a0 + %v1 = ashr <2 x i64> %v0, + store <2 x i64> %v1, ptr %res + ret void +} + +define void @ashr_v2i64_63(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: ashr_v2i64_63: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vsrai.d $vr0, $vr0, 63 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <2 x i64>, ptr %a0 + %v1 = ashr <2 x i64> %v0, + store <2 x i64> %v1, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/extractelement.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/extractelement.ll new file mode 100644 index 0000000000000000000000000000000000000000..8c9f779363f819b19deb4513c519f90ea82527a0 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/extractelement.ll @@ -0,0 +1,170 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +define void @extract_16xi8(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: extract_16xi8: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 1 +; CHECK-NEXT: st.b $a0, $a1, 0 +; CHECK-NEXT: ret + %v = load volatile <16 x i8>, ptr %src + %e = extractelement <16 x i8> %v, i32 1 + store i8 %e, ptr %dst + ret void +} + +define void @extract_8xi16(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: extract_8xi16: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 1 +; CHECK-NEXT: st.h $a0, $a1, 0 +; CHECK-NEXT: ret + %v = load volatile <8 x i16>, ptr %src + %e = extractelement <8 x i16> %v, i32 1 + store i16 %e, ptr %dst + ret void +} + +define void @extract_4xi32(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: extract_4xi32: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 1 +; CHECK-NEXT: st.w $a0, $a1, 0 +; CHECK-NEXT: ret + %v = load volatile <4 x i32>, ptr %src + %e = extractelement <4 x i32> %v, i32 1 + store i32 %e, ptr %dst + ret void +} + +define void @extract_2xi64(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: extract_2xi64: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 1 +; CHECK-NEXT: st.d $a0, $a1, 0 +; CHECK-NEXT: ret + %v = load volatile <2 x i64>, ptr %src + %e = extractelement <2 x i64> %v, i32 1 + store i64 %e, ptr %dst + ret void +} + +define void @extract_4xfloat(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: extract_4xfloat: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vreplvei.w $vr0, $vr0, 1 +; CHECK-NEXT: fst.s $fa0, $a1, 0 +; CHECK-NEXT: ret + %v = load volatile <4 x float>, ptr %src + %e = extractelement <4 x float> %v, i32 1 + store float %e, ptr %dst + ret void +} + +define void @extract_2xdouble(ptr %src, ptr %dst) nounwind { +; CHECK-LABEL: extract_2xdouble: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vreplvei.d $vr0, $vr0, 1 +; CHECK-NEXT: fst.d $fa0, $a1, 0 +; CHECK-NEXT: ret + %v = load volatile <2 x double>, ptr %src + %e = extractelement <2 x double> %v, i32 1 + store double %e, ptr %dst + ret void +} + +define void @extract_16xi8_idx(ptr %src, ptr %dst, i32 %idx) nounwind { +; CHECK-LABEL: extract_16xi8_idx: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $a2, $a2, 0 +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vreplve.b $vr0, $vr0, $a2 +; CHECK-NEXT: movfr2gr.s $a0, $fa0 +; CHECK-NEXT: srai.w $a0, $a0, 24 +; CHECK-NEXT: st.b $a0, $a1, 0 +; CHECK-NEXT: ret + %v = load volatile <16 x i8>, ptr %src + %e = extractelement <16 x i8> %v, i32 %idx + store i8 %e, ptr %dst + ret void +} + +define void @extract_8xi16_idx(ptr %src, ptr %dst, i32 %idx) nounwind { +; CHECK-LABEL: extract_8xi16_idx: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $a2, $a2, 0 +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vreplve.h $vr0, $vr0, $a2 +; CHECK-NEXT: movfr2gr.s $a0, $fa0 +; CHECK-NEXT: srai.w $a0, $a0, 16 +; CHECK-NEXT: st.h $a0, $a1, 0 +; CHECK-NEXT: ret + %v = load volatile <8 x i16>, ptr %src + %e = extractelement <8 x i16> %v, i32 %idx + store i16 %e, ptr %dst + ret void +} + +define void @extract_4xi32_idx(ptr %src, ptr %dst, i32 %idx) nounwind { +; CHECK-LABEL: extract_4xi32_idx: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $a2, $a2, 0 +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vreplve.w $vr0, $vr0, $a2 +; CHECK-NEXT: movfr2gr.s $a0, $fa0 +; CHECK-NEXT: st.w $a0, $a1, 0 +; CHECK-NEXT: ret + %v = load volatile <4 x i32>, ptr %src + %e = extractelement <4 x i32> %v, i32 %idx + store i32 %e, ptr %dst + ret void +} + +define void @extract_2xi64_idx(ptr %src, ptr %dst, i32 %idx) nounwind { +; CHECK-LABEL: extract_2xi64_idx: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $a2, $a2, 0 +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vreplve.d $vr0, $vr0, $a2 +; CHECK-NEXT: movfr2gr.d $a0, $fa0 +; CHECK-NEXT: st.d $a0, $a1, 0 +; CHECK-NEXT: ret + %v = load volatile <2 x i64>, ptr %src + %e = extractelement <2 x i64> %v, i32 %idx + store i64 %e, ptr %dst + ret void +} + +define void @extract_4xfloat_idx(ptr %src, ptr %dst, i32 %idx) nounwind { +; CHECK-LABEL: extract_4xfloat_idx: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $a2, $a2, 0 +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vreplve.w $vr0, $vr0, $a2 +; CHECK-NEXT: fst.s $fa0, $a1, 0 +; CHECK-NEXT: ret + %v = load volatile <4 x float>, ptr %src + %e = extractelement <4 x float> %v, i32 %idx + store float %e, ptr %dst + ret void +} + +define void @extract_2xdouble_idx(ptr %src, ptr %dst, i32 %idx) nounwind { +; CHECK-LABEL: extract_2xdouble_idx: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.w $a2, $a2, 0 +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vreplve.d $vr0, $vr0, $a2 +; CHECK-NEXT: fst.d $fa0, $a1, 0 +; CHECK-NEXT: ret + %v = load volatile <2 x double>, ptr %src + %e = extractelement <2 x double> %v, i32 %idx + store double %e, ptr %dst + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fadd.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fadd.ll new file mode 100644 index 0000000000000000000000000000000000000000..1fa1f611c4a36c7dc881b445a93c1f0de6e79767 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fadd.ll @@ -0,0 +1,34 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +define void @fadd_v4f32(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: fadd_v4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vfadd.s $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %v2 = fadd <4 x float> %v0, %v1 + store <4 x float> %v2, ptr %res + ret void +} + +define void @fadd_v2f64(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: fadd_v2f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vfadd.d $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %v2 = fadd <2 x double> %v0, %v1 + store <2 x double> %v2, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fcmp.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fcmp.ll new file mode 100644 index 0000000000000000000000000000000000000000..53fbf0b2f86fe43c363e29b321c5956782d3f536 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fcmp.ll @@ -0,0 +1,692 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +;; TREU +define void @v4f32_fcmp_true(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4f32_fcmp_true: +; CHECK: # %bb.0: +; CHECK-NEXT: vrepli.b $vr0, -1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %cmp = fcmp true <4 x float> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i32> + store <4 x i32> %ext, ptr %res + ret void +} + +;; FALSE +define void @v2f64_fcmp_false(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v2f64_fcmp_false: +; CHECK: # %bb.0: +; CHECK-NEXT: vrepli.b $vr0, 0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %cmp = fcmp false <2 x double> %v0, %v1 + %ext = sext <2 x i1> %cmp to <2 x i64> + store <2 x i64> %ext, ptr %res + ret void +} + +;; SETOEQ +define void @v4f32_fcmp_oeq(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4f32_fcmp_oeq: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vfcmp.ceq.s $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %cmp = fcmp oeq <4 x float> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i32> + store <4 x i32> %ext, ptr %res + ret void +} + +define void @v2f64_fcmp_oeq(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v2f64_fcmp_oeq: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vfcmp.ceq.d $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %cmp = fcmp oeq <2 x double> %v0, %v1 + %ext = sext <2 x i1> %cmp to <2 x i64> + store <2 x i64> %ext, ptr %res + ret void +} + +;; SETUEQ +define void @v4f32_fcmp_ueq(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4f32_fcmp_ueq: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vfcmp.cueq.s $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %cmp = fcmp ueq <4 x float> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i32> + store <4 x i32> %ext, ptr %res + ret void +} + +define void @v2f64_fcmp_ueq(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v2f64_fcmp_ueq: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vfcmp.cueq.d $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %cmp = fcmp ueq <2 x double> %v0, %v1 + %ext = sext <2 x i1> %cmp to <2 x i64> + store <2 x i64> %ext, ptr %res + ret void +} + +;; SETEQ +define void @v4f32_fcmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4f32_fcmp_eq: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vfcmp.ceq.s $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %cmp = fcmp fast oeq <4 x float> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i32> + store <4 x i32> %ext, ptr %res + ret void +} + +define void @v2f64_fcmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v2f64_fcmp_eq: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vfcmp.ceq.d $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %cmp = fcmp fast ueq <2 x double> %v0, %v1 + %ext = sext <2 x i1> %cmp to <2 x i64> + store <2 x i64> %ext, ptr %res + ret void +} + +;; SETOLE +define void @v4f32_fcmp_ole(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4f32_fcmp_ole: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vfcmp.cle.s $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %cmp = fcmp ole <4 x float> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i32> + store <4 x i32> %ext, ptr %res + ret void +} + +define void @v2f64_fcmp_ole(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v2f64_fcmp_ole: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vfcmp.cle.d $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %cmp = fcmp ole <2 x double> %v0, %v1 + %ext = sext <2 x i1> %cmp to <2 x i64> + store <2 x i64> %ext, ptr %res + ret void +} + +;; SETULE +define void @v4f32_fcmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4f32_fcmp_ule: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vfcmp.cule.s $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %cmp = fcmp ule <4 x float> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i32> + store <4 x i32> %ext, ptr %res + ret void +} + +define void @v2f64_fcmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v2f64_fcmp_ule: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vfcmp.cule.d $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %cmp = fcmp ule <2 x double> %v0, %v1 + %ext = sext <2 x i1> %cmp to <2 x i64> + store <2 x i64> %ext, ptr %res + ret void +} + +;; SETLE +define void @v4f32_fcmp_le(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4f32_fcmp_le: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vfcmp.cle.s $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %cmp = fcmp fast ole <4 x float> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i32> + store <4 x i32> %ext, ptr %res + ret void +} + +define void @v2f64_fcmp_le(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v2f64_fcmp_le: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vfcmp.cle.d $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %cmp = fcmp fast ule <2 x double> %v0, %v1 + %ext = sext <2 x i1> %cmp to <2 x i64> + store <2 x i64> %ext, ptr %res + ret void +} + +;; SETOLT +define void @v4f32_fcmp_olt(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4f32_fcmp_olt: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vfcmp.clt.s $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %cmp = fcmp olt <4 x float> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i32> + store <4 x i32> %ext, ptr %res + ret void +} + +define void @v2f64_fcmp_olt(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v2f64_fcmp_olt: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vfcmp.clt.d $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %cmp = fcmp olt <2 x double> %v0, %v1 + %ext = sext <2 x i1> %cmp to <2 x i64> + store <2 x i64> %ext, ptr %res + ret void +} + +;; SETULT +define void @v4f32_fcmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4f32_fcmp_ult: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vfcmp.cult.s $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %cmp = fcmp ult <4 x float> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i32> + store <4 x i32> %ext, ptr %res + ret void +} + +define void @v2f64_fcmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v2f64_fcmp_ult: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vfcmp.cult.d $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %cmp = fcmp ult <2 x double> %v0, %v1 + %ext = sext <2 x i1> %cmp to <2 x i64> + store <2 x i64> %ext, ptr %res + ret void +} + +;; SETLT +define void @v4f32_fcmp_lt(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4f32_fcmp_lt: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vfcmp.clt.s $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %cmp = fcmp fast olt <4 x float> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i32> + store <4 x i32> %ext, ptr %res + ret void +} + +define void @v2f64_fcmp_lt(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v2f64_fcmp_lt: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vfcmp.clt.d $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %cmp = fcmp fast ult <2 x double> %v0, %v1 + %ext = sext <2 x i1> %cmp to <2 x i64> + store <2 x i64> %ext, ptr %res + ret void +} + +;; SETONE +define void @v4f32_fcmp_one(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4f32_fcmp_one: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vfcmp.cne.s $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %cmp = fcmp one <4 x float> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i32> + store <4 x i32> %ext, ptr %res + ret void +} + +define void @v2f64_fcmp_one(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v2f64_fcmp_one: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vfcmp.cne.d $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %cmp = fcmp one <2 x double> %v0, %v1 + %ext = sext <2 x i1> %cmp to <2 x i64> + store <2 x i64> %ext, ptr %res + ret void +} + +;; SETUNE +define void @v4f32_fcmp_une(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4f32_fcmp_une: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vfcmp.cune.s $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %cmp = fcmp une <4 x float> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i32> + store <4 x i32> %ext, ptr %res + ret void +} + +define void @v2f64_fcmp_une(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v2f64_fcmp_une: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vfcmp.cune.d $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %cmp = fcmp une <2 x double> %v0, %v1 + %ext = sext <2 x i1> %cmp to <2 x i64> + store <2 x i64> %ext, ptr %res + ret void +} + +;; SETNE +define void @v4f32_fcmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4f32_fcmp_ne: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vfcmp.cne.s $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %cmp = fcmp fast one <4 x float> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i32> + store <4 x i32> %ext, ptr %res + ret void +} + +define void @v2f64_fcmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v2f64_fcmp_ne: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vfcmp.cne.d $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %cmp = fcmp fast une <2 x double> %v0, %v1 + %ext = sext <2 x i1> %cmp to <2 x i64> + store <2 x i64> %ext, ptr %res + ret void +} + +;; SETO +define void @v4f32_fcmp_ord(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4f32_fcmp_ord: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vfcmp.cor.s $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %cmp = fcmp ord <4 x float> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i32> + store <4 x i32> %ext, ptr %res + ret void +} + +define void @v2f64_fcmp_ord(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v2f64_fcmp_ord: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vfcmp.cor.d $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %cmp = fcmp ord <2 x double> %v0, %v1 + %ext = sext <2 x i1> %cmp to <2 x i64> + store <2 x i64> %ext, ptr %res + ret void +} + +;; SETUO +define void @v4f32_fcmp_uno(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4f32_fcmp_uno: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vfcmp.cun.s $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %cmp = fcmp uno <4 x float> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i32> + store <4 x i32> %ext, ptr %res + ret void +} + +define void @v2f64_fcmp_uno(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v2f64_fcmp_uno: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vfcmp.cun.d $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %cmp = fcmp uno <2 x double> %v0, %v1 + %ext = sext <2 x i1> %cmp to <2 x i64> + store <2 x i64> %ext, ptr %res + ret void +} + +;; Expand SETOGT +define void @v4f32_fcmp_ogt(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4f32_fcmp_ogt: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: vfcmp.clt.s $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %cmp = fcmp ogt <4 x float> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i32> + store <4 x i32> %ext, ptr %res + ret void +} + +define void @v2f64_fcmp_ogt(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v2f64_fcmp_ogt: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: vfcmp.clt.d $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %cmp = fcmp ogt <2 x double> %v0, %v1 + %ext = sext <2 x i1> %cmp to <2 x i64> + store <2 x i64> %ext, ptr %res + ret void +} + +;; Expand SETUGT +define void @v4f32_fcmp_ugt(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4f32_fcmp_ugt: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: vfcmp.cult.s $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %cmp = fcmp ugt <4 x float> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i32> + store <4 x i32> %ext, ptr %res + ret void +} + +define void @v2f64_fcmp_ugt(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v2f64_fcmp_ugt: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: vfcmp.cult.d $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %cmp = fcmp ugt <2 x double> %v0, %v1 + %ext = sext <2 x i1> %cmp to <2 x i64> + store <2 x i64> %ext, ptr %res + ret void +} + +;; Expand SETGT +define void @v4f32_fcmp_gt(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4f32_fcmp_gt: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: vfcmp.clt.s $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %cmp = fcmp fast ogt <4 x float> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i32> + store <4 x i32> %ext, ptr %res + ret void +} + +define void @v2f64_fcmp_gt(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v2f64_fcmp_gt: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: vfcmp.clt.d $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %cmp = fcmp fast ugt <2 x double> %v0, %v1 + %ext = sext <2 x i1> %cmp to <2 x i64> + store <2 x i64> %ext, ptr %res + ret void +} + +;; Expand SETOGE +define void @v4f32_fcmp_oge(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4f32_fcmp_oge: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: vfcmp.cle.s $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %cmp = fcmp oge <4 x float> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i32> + store <4 x i32> %ext, ptr %res + ret void +} + +define void @v2f64_fcmp_oge(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v2f64_fcmp_oge: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: vfcmp.cle.d $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %cmp = fcmp oge <2 x double> %v0, %v1 + %ext = sext <2 x i1> %cmp to <2 x i64> + store <2 x i64> %ext, ptr %res + ret void +} + +;; Expand SETUGE +define void @v4f32_fcmp_uge(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4f32_fcmp_uge: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: vfcmp.cule.s $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %cmp = fcmp uge <4 x float> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i32> + store <4 x i32> %ext, ptr %res + ret void +} + +define void @v2f64_fcmp_uge(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v2f64_fcmp_uge: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: vfcmp.cule.d $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %cmp = fcmp uge <2 x double> %v0, %v1 + %ext = sext <2 x i1> %cmp to <2 x i64> + store <2 x i64> %ext, ptr %res + ret void +} + +;; Expand SETGE +define void @v4f32_fcmp_ge(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4f32_fcmp_ge: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: vfcmp.cle.s $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %cmp = fcmp fast oge <4 x float> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i32> + store <4 x i32> %ext, ptr %res + ret void +} + +define void @v2f64_fcmp_ge(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v2f64_fcmp_ge: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: vfcmp.cle.d $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %cmp = fcmp fast uge <2 x double> %v0, %v1 + %ext = sext <2 x i1> %cmp to <2 x i64> + store <2 x i64> %ext, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fdiv.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fdiv.ll new file mode 100644 index 0000000000000000000000000000000000000000..5f1ee9e4d212ebc5e68702d6928ef7d28f53064e --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fdiv.ll @@ -0,0 +1,63 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +define void @fdiv_v4f32(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: fdiv_v4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vfdiv.s $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %v2 = fdiv <4 x float> %v0, %v1 + store <4 x float> %v2, ptr %res + ret void +} + +define void @fdiv_v2f64(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: fdiv_v2f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vfdiv.d $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %v2 = fdiv <2 x double> %v0, %v1 + store <2 x double> %v2, ptr %res + ret void +} + +;; 1.0 / vec +define void @one_fdiv_v4f32(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: one_fdiv_v4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vfrecip.s $vr0, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x float>, ptr %a0 + %div = fdiv <4 x float> , %v0 + store <4 x float> %div, ptr %res + ret void +} + +define void @one_fdiv_v2f64(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: one_fdiv_v2f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vfrecip.d $vr0, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <2 x double>, ptr %a0 + %div = fdiv <2 x double> , %v0 + store <2 x double> %div, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fmul.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fmul.ll new file mode 100644 index 0000000000000000000000000000000000000000..e7fb527f7805e81b981bc8ec9d0e6405a1e77f5c --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fmul.ll @@ -0,0 +1,34 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +define void @fmul_v4f32(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: fmul_v4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vfmul.s $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %v2 = fmul <4 x float> %v0, %v1 + store <4 x float> %v2, ptr %res + ret void +} + +define void @fmul_v2f64(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: fmul_v2f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vfmul.d $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %v2 = fmul <2 x double> %v0, %v1 + store <2 x double> %v2, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fneg.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fneg.ll new file mode 100644 index 0000000000000000000000000000000000000000..795c1ac8b368451c72dbe8f0bc8657d1335b64c0 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fneg.ll @@ -0,0 +1,29 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +define void @fneg_v4f32(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: fneg_v4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vbitrevi.w $vr0, $vr0, 31 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x float>, ptr %a0 + %v1 = fneg <4 x float> %v0 + store <4 x float> %v1, ptr %res + ret void +} +define void @fneg_v2f64(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: fneg_v2f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vbitrevi.d $vr0, $vr0, 63 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <2 x double>, ptr %a0 + %v1 = fneg <2 x double> %v0 + store <2 x double> %v1, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fptosi.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fptosi.ll new file mode 100644 index 0000000000000000000000000000000000000000..c3008fe96e47dacde5cbfab667e7f2387c23e0db --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fptosi.ll @@ -0,0 +1,28 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +define void @fptosi_v4f32_v4i32(ptr %res, ptr %in){ +; CHECK-LABEL: fptosi_v4f32_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vftintrz.w.s $vr0, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x float>, ptr %in + %v1 = fptosi <4 x float> %v0 to <4 x i32> + store <4 x i32> %v1, ptr %res + ret void +} + +define void @fptosi_v2f64_v2i64(ptr %res, ptr %in){ +; CHECK-LABEL: fptosi_v2f64_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vftintrz.l.d $vr0, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <2 x double>, ptr %in + %v1 = fptosi <2 x double> %v0 to <2 x i64> + store <2 x i64> %v1, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fptoui.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fptoui.ll new file mode 100644 index 0000000000000000000000000000000000000000..f0aeb0bd14e75ee9558b99c3ac91836fab8c69cc --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fptoui.ll @@ -0,0 +1,28 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +define void @fptoui_v4f32_v4i32(ptr %res, ptr %in){ +; CHECK-LABEL: fptoui_v4f32_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vftintrz.wu.s $vr0, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x float>, ptr %in + %v1 = fptoui <4 x float> %v0 to <4 x i32> + store <4 x i32> %v1, ptr %res + ret void +} + +define void @fptoui_v2f64_v2i64(ptr %res, ptr %in){ +; CHECK-LABEL: fptoui_v2f64_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vftintrz.lu.d $vr0, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <2 x double>, ptr %in + %v1 = fptoui <2 x double> %v0 to <2 x i64> + store <2 x i64> %v1, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fsub.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fsub.ll new file mode 100644 index 0000000000000000000000000000000000000000..df98182321dab9b5700e8310adcdcc4632cf47d1 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/fsub.ll @@ -0,0 +1,34 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +define void @fsub_v4f32(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: fsub_v4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vfsub.s $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %v2 = fsub <4 x float> %v0, %v1 + store <4 x float> %v2, ptr %res + ret void +} + +define void @fsub_v2f64(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: fsub_v2f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vfsub.d $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %v2 = fsub <2 x double> %v0, %v1 + store <2 x double> %v2, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/icmp.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/icmp.ll new file mode 100644 index 0000000000000000000000000000000000000000..448f3fa6c6e0e494b2b72cffee64fa5b2e787d16 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/icmp.ll @@ -0,0 +1,939 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +;; SETEQ +define void @v16i8_icmp_eq_imm(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: v16i8_icmp_eq_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vseqi.b $vr0, $vr0, 15 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i8>, ptr %a0 + %cmp = icmp eq <16 x i8> %v0, + %ext = sext <16 x i1> %cmp to <16 x i8> + store <16 x i8> %ext, ptr %res + ret void +} + +define void @v16i8_icmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v16i8_icmp_eq: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vseq.b $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i8>, ptr %a0 + %v1 = load <16 x i8>, ptr %a1 + %cmp = icmp eq <16 x i8> %v0, %v1 + %ext = sext <16 x i1> %cmp to <16 x i8> + store <16 x i8> %ext, ptr %res + ret void +} + +define void @v8i16_icmp_eq_imm(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: v8i16_icmp_eq_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vseqi.h $vr0, $vr0, 15 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i16>, ptr %a0 + %cmp = icmp eq <8 x i16> %v0, + %ext = sext <8 x i1> %cmp to <8 x i16> + store <8 x i16> %ext, ptr %res + ret void +} + +define void @v8i16_icmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v8i16_icmp_eq: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vseq.h $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i16>, ptr %a0 + %v1 = load <8 x i16>, ptr %a1 + %cmp = icmp eq <8 x i16> %v0, %v1 + %ext = sext <8 x i1> %cmp to <8 x i16> + store <8 x i16> %ext, ptr %res + ret void +} + +define void @v4i32_icmp_eq_imm(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: v4i32_icmp_eq_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vseqi.w $vr0, $vr0, 15 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i32>, ptr %a0 + %cmp = icmp eq <4 x i32> %v0, + %ext = sext <4 x i1> %cmp to <4 x i32> + store <4 x i32> %ext, ptr %res + ret void +} + +define void @v4i32_icmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4i32_icmp_eq: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vseq.w $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i32>, ptr %a0 + %v1 = load <4 x i32>, ptr %a1 + %cmp = icmp eq <4 x i32> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i32> + store <4 x i32> %ext, ptr %res + ret void +} + +define void @v2i64_icmp_eq_imm(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: v2i64_icmp_eq_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vseqi.d $vr0, $vr0, 15 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <2 x i64>, ptr %a0 + %cmp = icmp eq <2 x i64> %v0, + %ext = sext <2 x i1> %cmp to <2 x i64> + store <2 x i64> %ext, ptr %res + ret void +} + +define void @v2i64_icmp_eq(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v2i64_icmp_eq: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vseq.d $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <2 x i64>, ptr %a0 + %v1 = load <2 x i64>, ptr %a1 + %cmp = icmp eq <2 x i64> %v0, %v1 + %ext = sext <2 x i1> %cmp to <2 x i64> + store <2 x i64> %ext, ptr %res + ret void +} + +;; SETLE +define void @v16i8_icmp_sle_imm(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: v16i8_icmp_sle_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vslei.b $vr0, $vr0, 15 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i8>, ptr %a0 + %cmp = icmp sle <16 x i8> %v0, + %ext = sext <16 x i1> %cmp to <16 x i8> + store <16 x i8> %ext, ptr %res + ret void +} + +define void @v16i8_icmp_sle(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v16i8_icmp_sle: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vsle.b $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i8>, ptr %a0 + %v1 = load <16 x i8>, ptr %a1 + %cmp = icmp sle <16 x i8> %v0, %v1 + %ext = sext <16 x i1> %cmp to <16 x i8> + store <16 x i8> %ext, ptr %res + ret void +} + +define void @v8i16_icmp_sle_imm(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: v8i16_icmp_sle_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vslei.h $vr0, $vr0, 15 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i16>, ptr %a0 + %cmp = icmp sle <8 x i16> %v0, + %ext = sext <8 x i1> %cmp to <8 x i16> + store <8 x i16> %ext, ptr %res + ret void +} + +define void @v8i16_icmp_sle(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v8i16_icmp_sle: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vsle.h $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i16>, ptr %a0 + %v1 = load <8 x i16>, ptr %a1 + %cmp = icmp sle <8 x i16> %v0, %v1 + %ext = sext <8 x i1> %cmp to <8 x i16> + store <8 x i16> %ext, ptr %res + ret void +} + +define void @v4i32_icmp_sle_imm(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: v4i32_icmp_sle_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vslei.w $vr0, $vr0, 15 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i32>, ptr %a0 + %cmp = icmp sle <4 x i32> %v0, + %ext = sext <4 x i1> %cmp to <4 x i32> + store <4 x i32> %ext, ptr %res + ret void +} + +define void @v4i32_icmp_sle(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4i32_icmp_sle: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vsle.w $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i32>, ptr %a0 + %v1 = load <4 x i32>, ptr %a1 + %cmp = icmp sle <4 x i32> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i32> + store <4 x i32> %ext, ptr %res + ret void +} + +define void @v2i64_icmp_sle_imm(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: v2i64_icmp_sle_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vslei.d $vr0, $vr0, 15 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <2 x i64>, ptr %a0 + %cmp = icmp sle <2 x i64> %v0, + %ext = sext <2 x i1> %cmp to <2 x i64> + store <2 x i64> %ext, ptr %res + ret void +} + +define void @v2i64_icmp_sle(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v2i64_icmp_sle: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vsle.d $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <2 x i64>, ptr %a0 + %v1 = load <2 x i64>, ptr %a1 + %cmp = icmp sle <2 x i64> %v0, %v1 + %ext = sext <2 x i1> %cmp to <2 x i64> + store <2 x i64> %ext, ptr %res + ret void +} + +;; SETULE +define void @v16i8_icmp_ule_imm(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: v16i8_icmp_ule_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vslei.bu $vr0, $vr0, 31 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i8>, ptr %a0 + %cmp = icmp ule <16 x i8> %v0, + %ext = sext <16 x i1> %cmp to <16 x i8> + store <16 x i8> %ext, ptr %res + ret void +} + +define void @v16i8_icmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v16i8_icmp_ule: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vsle.bu $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i8>, ptr %a0 + %v1 = load <16 x i8>, ptr %a1 + %cmp = icmp ule <16 x i8> %v0, %v1 + %ext = sext <16 x i1> %cmp to <16 x i8> + store <16 x i8> %ext, ptr %res + ret void +} + +define void @v8i16_icmp_ule_imm(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: v8i16_icmp_ule_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vslei.hu $vr0, $vr0, 31 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i16>, ptr %a0 + %cmp = icmp ule <8 x i16> %v0, + %ext = sext <8 x i1> %cmp to <8 x i16> + store <8 x i16> %ext, ptr %res + ret void +} + +define void @v8i16_icmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v8i16_icmp_ule: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vsle.hu $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i16>, ptr %a0 + %v1 = load <8 x i16>, ptr %a1 + %cmp = icmp ule <8 x i16> %v0, %v1 + %ext = sext <8 x i1> %cmp to <8 x i16> + store <8 x i16> %ext, ptr %res + ret void +} + +define void @v4i32_icmp_ule_imm(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: v4i32_icmp_ule_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vslei.wu $vr0, $vr0, 31 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i32>, ptr %a0 + %cmp = icmp ule <4 x i32> %v0, + %ext = sext <4 x i1> %cmp to <4 x i32> + store <4 x i32> %ext, ptr %res + ret void +} + +define void @v4i32_icmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4i32_icmp_ule: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vsle.wu $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i32>, ptr %a0 + %v1 = load <4 x i32>, ptr %a1 + %cmp = icmp ule <4 x i32> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i32> + store <4 x i32> %ext, ptr %res + ret void +} + +define void @v2i64_icmp_ule_imm(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: v2i64_icmp_ule_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vslei.du $vr0, $vr0, 31 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <2 x i64>, ptr %a0 + %cmp = icmp ule <2 x i64> %v0, + %ext = sext <2 x i1> %cmp to <2 x i64> + store <2 x i64> %ext, ptr %res + ret void +} + +define void @v2i64_icmp_ule(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v2i64_icmp_ule: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vsle.du $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <2 x i64>, ptr %a0 + %v1 = load <2 x i64>, ptr %a1 + %cmp = icmp ule <2 x i64> %v0, %v1 + %ext = sext <2 x i1> %cmp to <2 x i64> + store <2 x i64> %ext, ptr %res + ret void +} + +;; SETLT +define void @v16i8_icmp_slt_imm(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: v16i8_icmp_slt_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vslti.b $vr0, $vr0, 15 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i8>, ptr %a0 + %cmp = icmp slt <16 x i8> %v0, + %ext = sext <16 x i1> %cmp to <16 x i8> + store <16 x i8> %ext, ptr %res + ret void +} + +define void @v16i8_icmp_slt(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v16i8_icmp_slt: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vslt.b $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i8>, ptr %a0 + %v1 = load <16 x i8>, ptr %a1 + %cmp = icmp slt <16 x i8> %v0, %v1 + %ext = sext <16 x i1> %cmp to <16 x i8> + store <16 x i8> %ext, ptr %res + ret void +} + +define void @v8i16_icmp_slt_imm(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: v8i16_icmp_slt_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vslti.h $vr0, $vr0, 15 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i16>, ptr %a0 + %cmp = icmp slt <8 x i16> %v0, + %ext = sext <8 x i1> %cmp to <8 x i16> + store <8 x i16> %ext, ptr %res + ret void +} + +define void @v8i16_icmp_slt(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v8i16_icmp_slt: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vslt.h $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i16>, ptr %a0 + %v1 = load <8 x i16>, ptr %a1 + %cmp = icmp slt <8 x i16> %v0, %v1 + %ext = sext <8 x i1> %cmp to <8 x i16> + store <8 x i16> %ext, ptr %res + ret void +} + +define void @v4i32_icmp_slt_imm(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: v4i32_icmp_slt_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vslti.w $vr0, $vr0, 15 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i32>, ptr %a0 + %cmp = icmp slt <4 x i32> %v0, + %ext = sext <4 x i1> %cmp to <4 x i32> + store <4 x i32> %ext, ptr %res + ret void +} + +define void @v4i32_icmp_slt(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4i32_icmp_slt: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vslt.w $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i32>, ptr %a0 + %v1 = load <4 x i32>, ptr %a1 + %cmp = icmp slt <4 x i32> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i32> + store <4 x i32> %ext, ptr %res + ret void +} + +define void @v2i64_icmp_slt_imm(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: v2i64_icmp_slt_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vslti.d $vr0, $vr0, 15 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <2 x i64>, ptr %a0 + %cmp = icmp slt <2 x i64> %v0, + %ext = sext <2 x i1> %cmp to <2 x i64> + store <2 x i64> %ext, ptr %res + ret void +} + +define void @v2i64_icmp_slt(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v2i64_icmp_slt: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vslt.d $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <2 x i64>, ptr %a0 + %v1 = load <2 x i64>, ptr %a1 + %cmp = icmp slt <2 x i64> %v0, %v1 + %ext = sext <2 x i1> %cmp to <2 x i64> + store <2 x i64> %ext, ptr %res + ret void +} + +;; SETULT +define void @v16i8_icmp_ult_imm(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: v16i8_icmp_ult_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vslti.bu $vr0, $vr0, 31 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i8>, ptr %a0 + %cmp = icmp ult <16 x i8> %v0, + %ext = sext <16 x i1> %cmp to <16 x i8> + store <16 x i8> %ext, ptr %res + ret void +} + +define void @v16i8_icmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v16i8_icmp_ult: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vslt.bu $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i8>, ptr %a0 + %v1 = load <16 x i8>, ptr %a1 + %cmp = icmp ult <16 x i8> %v0, %v1 + %ext = sext <16 x i1> %cmp to <16 x i8> + store <16 x i8> %ext, ptr %res + ret void +} + +define void @v8i16_icmp_ult_imm(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: v8i16_icmp_ult_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vslti.hu $vr0, $vr0, 31 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i16>, ptr %a0 + %cmp = icmp ult <8 x i16> %v0, + %ext = sext <8 x i1> %cmp to <8 x i16> + store <8 x i16> %ext, ptr %res + ret void +} + +define void @v8i16_icmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v8i16_icmp_ult: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vslt.hu $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i16>, ptr %a0 + %v1 = load <8 x i16>, ptr %a1 + %cmp = icmp ult <8 x i16> %v0, %v1 + %ext = sext <8 x i1> %cmp to <8 x i16> + store <8 x i16> %ext, ptr %res + ret void +} + +define void @v4i32_icmp_ult_imm(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: v4i32_icmp_ult_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vslti.wu $vr0, $vr0, 31 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i32>, ptr %a0 + %cmp = icmp ult <4 x i32> %v0, + %ext = sext <4 x i1> %cmp to <4 x i32> + store <4 x i32> %ext, ptr %res + ret void +} + +define void @v4i32_icmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4i32_icmp_ult: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vslt.wu $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i32>, ptr %a0 + %v1 = load <4 x i32>, ptr %a1 + %cmp = icmp ult <4 x i32> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i32> + store <4 x i32> %ext, ptr %res + ret void +} + +define void @v2i64_icmp_ult_imm(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: v2i64_icmp_ult_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vslti.du $vr0, $vr0, 31 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <2 x i64>, ptr %a0 + %cmp = icmp ult <2 x i64> %v0, + %ext = sext <2 x i1> %cmp to <2 x i64> + store <2 x i64> %ext, ptr %res + ret void +} + +define void @v2i64_icmp_ult(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v2i64_icmp_ult: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vslt.du $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <2 x i64>, ptr %a0 + %v1 = load <2 x i64>, ptr %a1 + %cmp = icmp ult <2 x i64> %v0, %v1 + %ext = sext <2 x i1> %cmp to <2 x i64> + store <2 x i64> %ext, ptr %res + ret void +} + +;; Expand SETNE +define void @v16i8_icmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v16i8_icmp_ne: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vseq.b $vr0, $vr1, $vr0 +; CHECK-NEXT: vxori.b $vr0, $vr0, 255 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i8>, ptr %a0 + %v1 = load <16 x i8>, ptr %a1 + %cmp = icmp ne <16 x i8> %v0, %v1 + %ext = sext <16 x i1> %cmp to <16 x i8> + store <16 x i8> %ext, ptr %res + ret void +} + +define void @v8i16_icmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v8i16_icmp_ne: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vseq.h $vr0, $vr1, $vr0 +; CHECK-NEXT: vrepli.b $vr1, -1 +; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i16>, ptr %a0 + %v1 = load <8 x i16>, ptr %a1 + %cmp = icmp ne <8 x i16> %v0, %v1 + %ext = sext <8 x i1> %cmp to <8 x i16> + store <8 x i16> %ext, ptr %res + ret void +} + +define void @v4i32_icmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4i32_icmp_ne: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vseq.w $vr0, $vr1, $vr0 +; CHECK-NEXT: vrepli.b $vr1, -1 +; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i32>, ptr %a0 + %v1 = load <4 x i32>, ptr %a1 + %cmp = icmp ne <4 x i32> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i32> + store <4 x i32> %ext, ptr %res + ret void +} + +define void @v2i64_icmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v2i64_icmp_ne: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vseq.d $vr0, $vr1, $vr0 +; CHECK-NEXT: vrepli.b $vr1, -1 +; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <2 x i64>, ptr %a0 + %v1 = load <2 x i64>, ptr %a1 + %cmp = icmp ne <2 x i64> %v0, %v1 + %ext = sext <2 x i1> %cmp to <2 x i64> + store <2 x i64> %ext, ptr %res + ret void +} + +;; Expand SETGE +define void @v16i8_icmp_sge(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v16i8_icmp_sge: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: vsle.b $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i8>, ptr %a0 + %v1 = load <16 x i8>, ptr %a1 + %cmp = icmp sge <16 x i8> %v0, %v1 + %ext = sext <16 x i1> %cmp to <16 x i8> + store <16 x i8> %ext, ptr %res + ret void +} + +define void @v8i16_icmp_sge(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v8i16_icmp_sge: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: vsle.h $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i16>, ptr %a0 + %v1 = load <8 x i16>, ptr %a1 + %cmp = icmp sge <8 x i16> %v0, %v1 + %ext = sext <8 x i1> %cmp to <8 x i16> + store <8 x i16> %ext, ptr %res + ret void +} + +define void @v4i32_icmp_sge(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4i32_icmp_sge: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: vsle.w $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i32>, ptr %a0 + %v1 = load <4 x i32>, ptr %a1 + %cmp = icmp sge <4 x i32> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i32> + store <4 x i32> %ext, ptr %res + ret void +} + +define void @v2i64_icmp_sge(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v2i64_icmp_sge: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: vsle.d $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <2 x i64>, ptr %a0 + %v1 = load <2 x i64>, ptr %a1 + %cmp = icmp sge <2 x i64> %v0, %v1 + %ext = sext <2 x i1> %cmp to <2 x i64> + store <2 x i64> %ext, ptr %res + ret void +} + +;; Expand SETUGE +define void @v16i8_icmp_uge(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v16i8_icmp_uge: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: vsle.bu $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i8>, ptr %a0 + %v1 = load <16 x i8>, ptr %a1 + %cmp = icmp uge <16 x i8> %v0, %v1 + %ext = sext <16 x i1> %cmp to <16 x i8> + store <16 x i8> %ext, ptr %res + ret void +} + +define void @v8i16_icmp_uge(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v8i16_icmp_uge: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: vsle.hu $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i16>, ptr %a0 + %v1 = load <8 x i16>, ptr %a1 + %cmp = icmp uge <8 x i16> %v0, %v1 + %ext = sext <8 x i1> %cmp to <8 x i16> + store <8 x i16> %ext, ptr %res + ret void +} + +define void @v4i32_icmp_uge(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4i32_icmp_uge: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: vsle.wu $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i32>, ptr %a0 + %v1 = load <4 x i32>, ptr %a1 + %cmp = icmp uge <4 x i32> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i32> + store <4 x i32> %ext, ptr %res + ret void +} + +define void @v2i64_icmp_uge(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v2i64_icmp_uge: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: vsle.du $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <2 x i64>, ptr %a0 + %v1 = load <2 x i64>, ptr %a1 + %cmp = icmp uge <2 x i64> %v0, %v1 + %ext = sext <2 x i1> %cmp to <2 x i64> + store <2 x i64> %ext, ptr %res + ret void +} + +;; Expand SETGT +define void @v16i8_icmp_sgt(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v16i8_icmp_sgt: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: vslt.b $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i8>, ptr %a0 + %v1 = load <16 x i8>, ptr %a1 + %cmp = icmp sgt <16 x i8> %v0, %v1 + %ext = sext <16 x i1> %cmp to <16 x i8> + store <16 x i8> %ext, ptr %res + ret void +} + +define void @v8i16_icmp_sgt(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v8i16_icmp_sgt: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: vslt.h $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i16>, ptr %a0 + %v1 = load <8 x i16>, ptr %a1 + %cmp = icmp sgt <8 x i16> %v0, %v1 + %ext = sext <8 x i1> %cmp to <8 x i16> + store <8 x i16> %ext, ptr %res + ret void +} + +define void @v4i32_icmp_sgt(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4i32_icmp_sgt: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: vslt.w $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i32>, ptr %a0 + %v1 = load <4 x i32>, ptr %a1 + %cmp = icmp sgt <4 x i32> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i32> + store <4 x i32> %ext, ptr %res + ret void +} + +define void @v2i64_icmp_sgt(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v2i64_icmp_sgt: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: vslt.d $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <2 x i64>, ptr %a0 + %v1 = load <2 x i64>, ptr %a1 + %cmp = icmp sgt <2 x i64> %v0, %v1 + %ext = sext <2 x i1> %cmp to <2 x i64> + store <2 x i64> %ext, ptr %res + ret void +} + +;; Expand SETUGT +define void @v16i8_icmp_ugt(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v16i8_icmp_ugt: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: vslt.bu $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i8>, ptr %a0 + %v1 = load <16 x i8>, ptr %a1 + %cmp = icmp ugt <16 x i8> %v0, %v1 + %ext = sext <16 x i1> %cmp to <16 x i8> + store <16 x i8> %ext, ptr %res + ret void +} + +define void @v8i16_icmp_ugt(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v8i16_icmp_ugt: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: vslt.hu $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i16>, ptr %a0 + %v1 = load <8 x i16>, ptr %a1 + %cmp = icmp ugt <8 x i16> %v0, %v1 + %ext = sext <8 x i1> %cmp to <8 x i16> + store <8 x i16> %ext, ptr %res + ret void +} + +define void @v4i32_icmp_ugt(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v4i32_icmp_ugt: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: vslt.wu $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i32>, ptr %a0 + %v1 = load <4 x i32>, ptr %a1 + %cmp = icmp ugt <4 x i32> %v0, %v1 + %ext = sext <4 x i1> %cmp to <4 x i32> + store <4 x i32> %ext, ptr %res + ret void +} + +define void @v2i64_icmp_ugt(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: v2i64_icmp_ugt: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: vslt.du $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <2 x i64>, ptr %a0 + %v1 = load <2 x i64>, ptr %a1 + %cmp = icmp ugt <2 x i64> %v0, %v1 + %ext = sext <2 x i1> %cmp to <2 x i64> + store <2 x i64> %ext, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/insertelement.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/insertelement.ll new file mode 100644 index 0000000000000000000000000000000000000000..a9834591aa0e8544c3269f8552df9367eb749767 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/insertelement.ll @@ -0,0 +1,196 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +define void @insert_16xi8(ptr %src, ptr %dst, i8 %ins) nounwind { +; CHECK-LABEL: insert_16xi8: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vinsgr2vr.b $vr0, $a2, 1 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: ret + %v = load volatile <16 x i8>, ptr %src + %v_new = insertelement <16 x i8> %v, i8 %ins, i32 1 + store <16 x i8> %v_new, ptr %dst + ret void +} + +define void @insert_8xi16(ptr %src, ptr %dst, i16 %ins) nounwind { +; CHECK-LABEL: insert_8xi16: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vinsgr2vr.h $vr0, $a2, 1 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: ret + %v = load volatile <8 x i16>, ptr %src + %v_new = insertelement <8 x i16> %v, i16 %ins, i32 1 + store <8 x i16> %v_new, ptr %dst + ret void +} + +define void @insert_4xi32(ptr %src, ptr %dst, i32 %ins) nounwind { +; CHECK-LABEL: insert_4xi32: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vinsgr2vr.w $vr0, $a2, 1 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: ret + %v = load volatile <4 x i32>, ptr %src + %v_new = insertelement <4 x i32> %v, i32 %ins, i32 1 + store <4 x i32> %v_new, ptr %dst + ret void +} + +define void @insert_2xi64(ptr %src, ptr %dst, i64 %ins) nounwind { +; CHECK-LABEL: insert_2xi64: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vinsgr2vr.d $vr0, $a2, 1 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: ret + %v = load volatile <2 x i64>, ptr %src + %v_new = insertelement <2 x i64> %v, i64 %ins, i32 1 + store <2 x i64> %v_new, ptr %dst + ret void +} + +define void @insert_4xfloat(ptr %src, ptr %dst, float %ins) nounwind { +; CHECK-LABEL: insert_4xfloat: +; CHECK: # %bb.0: +; CHECK-NEXT: movfr2gr.s $a2, $fa0 +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vinsgr2vr.w $vr0, $a2, 1 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: ret + %v = load volatile <4 x float>, ptr %src + %v_new = insertelement <4 x float> %v, float %ins, i32 1 + store <4 x float> %v_new, ptr %dst + ret void +} + +define void @insert_2xdouble(ptr %src, ptr %dst, double %ins) nounwind { +; CHECK-LABEL: insert_2xdouble: +; CHECK: # %bb.0: +; CHECK-NEXT: movfr2gr.d $a2, $fa0 +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vinsgr2vr.d $vr0, $a2, 1 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: ret + %v = load volatile <2 x double>, ptr %src + %v_new = insertelement <2 x double> %v, double %ins, i32 1 + store <2 x double> %v_new, ptr %dst + ret void +} + +define void @insert_16xi8_idx(ptr %src, ptr %dst, i8 %ins, i32 %idx) nounwind { +; CHECK-LABEL: insert_16xi8_idx: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vst $vr0, $sp, 0 +; CHECK-NEXT: addi.d $a0, $sp, 0 +; CHECK-NEXT: bstrins.d $a0, $a3, 3, 0 +; CHECK-NEXT: st.b $a2, $a0, 0 +; CHECK-NEXT: vld $vr0, $sp, 0 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: ret + %v = load volatile <16 x i8>, ptr %src + %v_new = insertelement <16 x i8> %v, i8 %ins, i32 %idx + store <16 x i8> %v_new, ptr %dst + ret void +} + +define void @insert_8xi16_idx(ptr %src, ptr %dst, i16 %ins, i32 %idx) nounwind { +; CHECK-LABEL: insert_8xi16_idx: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vst $vr0, $sp, 0 +; CHECK-NEXT: addi.d $a0, $sp, 0 +; CHECK-NEXT: bstrins.d $a0, $a3, 3, 1 +; CHECK-NEXT: st.h $a2, $a0, 0 +; CHECK-NEXT: vld $vr0, $sp, 0 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: ret + %v = load volatile <8 x i16>, ptr %src + %v_new = insertelement <8 x i16> %v, i16 %ins, i32 %idx + store <8 x i16> %v_new, ptr %dst + ret void +} + +define void @insert_4xi32_idx(ptr %src, ptr %dst, i32 %ins, i32 %idx) nounwind { +; CHECK-LABEL: insert_4xi32_idx: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vst $vr0, $sp, 0 +; CHECK-NEXT: addi.d $a0, $sp, 0 +; CHECK-NEXT: bstrins.d $a0, $a3, 3, 2 +; CHECK-NEXT: st.w $a2, $a0, 0 +; CHECK-NEXT: vld $vr0, $sp, 0 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: ret + %v = load volatile <4 x i32>, ptr %src + %v_new = insertelement <4 x i32> %v, i32 %ins, i32 %idx + store <4 x i32> %v_new, ptr %dst + ret void +} + +define void @insert_2xi64_idx(ptr %src, ptr %dst, i64 %ins, i32 %idx) nounwind { +; CHECK-LABEL: insert_2xi64_idx: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: vld $vr0, $a0, 0 +; CHECK-NEXT: vst $vr0, $sp, 0 +; CHECK-NEXT: addi.d $a0, $sp, 0 +; CHECK-NEXT: bstrins.d $a0, $a3, 3, 3 +; CHECK-NEXT: st.d $a2, $a0, 0 +; CHECK-NEXT: vld $vr0, $sp, 0 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: ret + %v = load volatile <2 x i64>, ptr %src + %v_new = insertelement <2 x i64> %v, i64 %ins, i32 %idx + store <2 x i64> %v_new, ptr %dst + ret void +} + +define void @insert_4xfloat_idx(ptr %src, ptr %dst, float %ins, i32 %idx) nounwind { +; CHECK-LABEL: insert_4xfloat_idx: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: vld $vr1, $a0, 0 +; CHECK-NEXT: vst $vr1, $sp, 0 +; CHECK-NEXT: addi.d $a0, $sp, 0 +; CHECK-NEXT: bstrins.d $a0, $a2, 3, 2 +; CHECK-NEXT: fst.s $fa0, $a0, 0 +; CHECK-NEXT: vld $vr0, $sp, 0 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: ret + %v = load volatile <4 x float>, ptr %src + %v_new = insertelement <4 x float> %v, float %ins, i32 %idx + store <4 x float> %v_new, ptr %dst + ret void +} + +define void @insert_2xdouble_idx(ptr %src, ptr %dst, double %ins, i32 %idx) nounwind { +; CHECK-LABEL: insert_2xdouble_idx: +; CHECK: # %bb.0: +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: vld $vr1, $a0, 0 +; CHECK-NEXT: vst $vr1, $sp, 0 +; CHECK-NEXT: addi.d $a0, $sp, 0 +; CHECK-NEXT: bstrins.d $a0, $a2, 3, 3 +; CHECK-NEXT: fst.d $fa0, $a0, 0 +; CHECK-NEXT: vld $vr0, $sp, 0 +; CHECK-NEXT: vst $vr0, $a1, 0 +; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: ret + %v = load volatile <2 x double>, ptr %src + %v_new = insertelement <2 x double> %v, double %ins, i32 %idx + store <2 x double> %v_new, ptr %dst + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/lshr.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/lshr.ll new file mode 100644 index 0000000000000000000000000000000000000000..dada52f93060e10d78ea9d1f524e2e548de55252 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/lshr.ll @@ -0,0 +1,178 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +define void @lshr_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: lshr_v16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vsrl.b $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i8>, ptr %a0 + %v1 = load <16 x i8>, ptr %a1 + %v2 = lshr <16 x i8> %v0, %v1 + store <16 x i8> %v2, ptr %res + ret void +} + +define void @lshr_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: lshr_v8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vsrl.h $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i16>, ptr %a0 + %v1 = load <8 x i16>, ptr %a1 + %v2 = lshr <8 x i16> %v0, %v1 + store <8 x i16> %v2, ptr %res + ret void +} + +define void @lshr_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: lshr_v4i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vsrl.w $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i32>, ptr %a0 + %v1 = load <4 x i32>, ptr %a1 + %v2 = lshr <4 x i32> %v0, %v1 + store <4 x i32> %v2, ptr %res + ret void +} + +define void @lshr_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: lshr_v2i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vsrl.d $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <2 x i64>, ptr %a0 + %v1 = load <2 x i64>, ptr %a1 + %v2 = lshr <2 x i64> %v0, %v1 + store <2 x i64> %v2, ptr %res + ret void +} + +define void @lshr_v16i8_1(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: lshr_v16i8_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vsrli.b $vr0, $vr0, 1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i8>, ptr %a0 + %v1 = lshr <16 x i8> %v0, + store <16 x i8> %v1, ptr %res + ret void +} + +define void @lshr_v16i8_7(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: lshr_v16i8_7: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vsrli.b $vr0, $vr0, 7 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i8>, ptr %a0 + %v1 = lshr <16 x i8> %v0, + store <16 x i8> %v1, ptr %res + ret void +} + +define void @lshr_v8i16_1(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: lshr_v8i16_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vsrli.h $vr0, $vr0, 1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i16>, ptr %a0 + %v1 = lshr <8 x i16> %v0, + store <8 x i16> %v1, ptr %res + ret void +} + +define void @lshr_v8i16_15(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: lshr_v8i16_15: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vsrli.h $vr0, $vr0, 15 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i16>, ptr %a0 + %v1 = lshr <8 x i16> %v0, + store <8 x i16> %v1, ptr %res + ret void +} + +define void @lshr_v4i32_1(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: lshr_v4i32_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vsrli.w $vr0, $vr0, 1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i32>, ptr %a0 + %v1 = lshr <4 x i32> %v0, + store <4 x i32> %v1, ptr %res + ret void +} + +define void @lshr_v4i32_31(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: lshr_v4i32_31: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vsrli.w $vr0, $vr0, 31 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i32>, ptr %a0 + %v1 = lshr <4 x i32> %v0, + store <4 x i32> %v1, ptr %res + ret void +} + +define void @lshr_v2i64_1(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: lshr_v2i64_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vsrli.d $vr0, $vr0, 1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <2 x i64>, ptr %a0 + %v1 = lshr <2 x i64> %v0, + store <2 x i64> %v1, ptr %res + ret void +} + +define void @lshr_v2i64_63(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: lshr_v2i64_63: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vsrli.d $vr0, $vr0, 63 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <2 x i64>, ptr %a0 + %v1 = lshr <2 x i64> %v0, + store <2 x i64> %v1, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/mul.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/mul.ll new file mode 100644 index 0000000000000000000000000000000000000000..d0be9cb7e3c8bdcc07626d86897161d98e5eabaa --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/mul.ll @@ -0,0 +1,238 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +define void @mul_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: mul_v16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vmul.b $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i8>, ptr %a0 + %v1 = load <16 x i8>, ptr %a1 + %v2 = mul <16 x i8> %v0, %v1 + store <16 x i8> %v2, ptr %res + ret void +} + +define void @mul_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: mul_v8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vmul.h $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i16>, ptr %a0 + %v1 = load <8 x i16>, ptr %a1 + %v2 = mul <8 x i16> %v0, %v1 + store <8 x i16> %v2, ptr %res + ret void +} + +define void @mul_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: mul_v4i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vmul.w $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i32>, ptr %a0 + %v1 = load <4 x i32>, ptr %a1 + %v2 = mul <4 x i32> %v0, %v1 + store <4 x i32> %v2, ptr %res + ret void +} + +define void @mul_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: mul_v2i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vmul.d $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <2 x i64>, ptr %a0 + %v1 = load <2 x i64>, ptr %a1 + %v2 = mul <2 x i64> %v0, %v1 + store <2 x i64> %v2, ptr %res + ret void +} + +define void @mul_square_v16i8(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: mul_square_v16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vmul.b $vr0, $vr0, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i8>, ptr %a0 + %v1 = mul <16 x i8> %v0, %v0 + store <16 x i8> %v1, ptr %res + ret void +} + +define void @mul_square_v8i16(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: mul_square_v8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vmul.h $vr0, $vr0, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i16>, ptr %a0 + %v1 = mul <8 x i16> %v0, %v0 + store <8 x i16> %v1, ptr %res + ret void +} + +define void @mul_square_v4i32(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: mul_square_v4i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vmul.w $vr0, $vr0, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i32>, ptr %a0 + %v1 = mul <4 x i32> %v0, %v0 + store <4 x i32> %v1, ptr %res + ret void +} + +define void @mul_square_v2i64(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: mul_square_v2i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vmul.d $vr0, $vr0, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <2 x i64>, ptr %a0 + %v1 = mul <2 x i64> %v0, %v0 + store <2 x i64> %v1, ptr %res + ret void +} + +define void @mul_v16i8_8(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: mul_v16i8_8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vslli.b $vr0, $vr0, 3 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i8>, ptr %a0 + %v1 = mul <16 x i8> %v0, + store <16 x i8> %v1, ptr %res + ret void +} + +define void @mul_v8i16_8(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: mul_v8i16_8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vslli.h $vr0, $vr0, 3 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i16>, ptr %a0 + %v1 = mul <8 x i16> %v0, + store <8 x i16> %v1, ptr %res + ret void +} + +define void @mul_v4i32_8(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: mul_v4i32_8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vslli.w $vr0, $vr0, 3 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i32>, ptr %a0 + %v1 = mul <4 x i32> %v0, + store <4 x i32> %v1, ptr %res + ret void +} + +define void @mul_v2i64_8(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: mul_v2i64_8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vslli.d $vr0, $vr0, 3 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <2 x i64>, ptr %a0 + %v1 = mul <2 x i64> %v0, + store <2 x i64> %v1, ptr %res + ret void +} + +define void @mul_v16i8_17(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: mul_v16i8_17: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vrepli.b $vr1, 17 +; CHECK-NEXT: vmul.b $vr0, $vr0, $vr1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i8>, ptr %a0 + %v1 = mul <16 x i8> %v0, + store <16 x i8> %v1, ptr %res + ret void +} + +define void @mul_v8i16_17(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: mul_v8i16_17: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vrepli.h $vr1, 17 +; CHECK-NEXT: vmul.h $vr0, $vr0, $vr1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i16>, ptr %a0 + %v1 = mul <8 x i16> %v0, + store <8 x i16> %v1, ptr %res + ret void +} + +define void @mul_v4i32_17(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: mul_v4i32_17: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vrepli.w $vr1, 17 +; CHECK-NEXT: vmul.w $vr0, $vr0, $vr1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i32>, ptr %a0 + %v1 = mul <4 x i32> %v0, + store <4 x i32> %v1, ptr %res + ret void +} + +define void @mul_v2i64_17(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: mul_v2i64_17: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vrepli.d $vr1, 17 +; CHECK-NEXT: vmul.d $vr0, $vr0, $vr1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <2 x i64>, ptr %a0 + %v1 = mul <2 x i64> %v0, + store <2 x i64> %v1, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/or.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/or.ll new file mode 100644 index 0000000000000000000000000000000000000000..f124512acce73deac3fee2138b51b3db3b2bb822 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/or.ll @@ -0,0 +1,125 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +define void @or_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: or_v16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vor.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i8>, ptr %a0 + %v1 = load <16 x i8>, ptr %a1 + %v2 = or <16 x i8> %v0, %v1 + store <16 x i8> %v2, ptr %res + ret void +} + +define void @or_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: or_v8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vor.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i16>, ptr %a0 + %v1 = load <8 x i16>, ptr %a1 + %v2 = or <8 x i16> %v0, %v1 + store <8 x i16> %v2, ptr %res + ret void +} + +define void @or_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: or_v4i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vor.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i32>, ptr %a0 + %v1 = load <4 x i32>, ptr %a1 + %v2 = or <4 x i32> %v0, %v1 + store <4 x i32> %v2, ptr %res + ret void +} + +define void @or_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: or_v2i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vor.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <2 x i64>, ptr %a0 + %v1 = load <2 x i64>, ptr %a1 + %v2 = or <2 x i64> %v0, %v1 + store <2 x i64> %v2, ptr %res + ret void +} + +define void @or_u_v16i8(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: or_u_v16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vori.b $vr0, $vr0, 31 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i8>, ptr %a0 + %v1 = or <16 x i8> %v0, + store <16 x i8> %v1, ptr %res + ret void +} + +define void @or_u_v8i16(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: or_u_v8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vrepli.h $vr1, 31 +; CHECK-NEXT: vor.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i16>, ptr %a0 + %v1 = or <8 x i16> %v0, + store <8 x i16> %v1, ptr %res + ret void +} + +define void @or_u_v4i32(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: or_u_v4i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vrepli.w $vr1, 31 +; CHECK-NEXT: vor.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i32>, ptr %a0 + %v1 = or <4 x i32> %v0, + store <4 x i32> %v1, ptr %res + ret void +} + +define void @or_u_v2i64(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: or_u_v2i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vrepli.d $vr1, 31 +; CHECK-NEXT: vor.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <2 x i64>, ptr %a0 + %v1 = or <2 x i64> %v0, + store <2 x i64> %v1, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sdiv.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sdiv.ll new file mode 100644 index 0000000000000000000000000000000000000000..b68f73a749135d6a46f73e49ac9cb5e3d5ea1971 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sdiv.ll @@ -0,0 +1,134 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +define void @sdiv_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: sdiv_v16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vdiv.b $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i8>, ptr %a0 + %v1 = load <16 x i8>, ptr %a1 + %v2 = sdiv <16 x i8> %v0, %v1 + store <16 x i8> %v2, ptr %res + ret void +} + +define void @sdiv_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: sdiv_v8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vdiv.h $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i16>, ptr %a0 + %v1 = load <8 x i16>, ptr %a1 + %v2 = sdiv <8 x i16> %v0, %v1 + store <8 x i16> %v2, ptr %res + ret void +} + +define void @sdiv_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: sdiv_v4i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vdiv.w $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i32>, ptr %a0 + %v1 = load <4 x i32>, ptr %a1 + %v2 = sdiv <4 x i32> %v0, %v1 + store <4 x i32> %v2, ptr %res + ret void +} + +define void @sdiv_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: sdiv_v2i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vdiv.d $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <2 x i64>, ptr %a0 + %v1 = load <2 x i64>, ptr %a1 + %v2 = sdiv <2 x i64> %v0, %v1 + store <2 x i64> %v2, ptr %res + ret void +} + +define void @sdiv_v16i8_8(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: sdiv_v16i8_8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vsrai.b $vr1, $vr0, 7 +; CHECK-NEXT: vsrli.b $vr1, $vr1, 5 +; CHECK-NEXT: vadd.b $vr0, $vr0, $vr1 +; CHECK-NEXT: vsrai.b $vr0, $vr0, 3 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i8>, ptr %a0 + %v1 = sdiv <16 x i8> %v0, + store <16 x i8> %v1, ptr %res + ret void +} + +define void @sdiv_v8i16_8(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: sdiv_v8i16_8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vsrai.h $vr1, $vr0, 15 +; CHECK-NEXT: vsrli.h $vr1, $vr1, 13 +; CHECK-NEXT: vadd.h $vr0, $vr0, $vr1 +; CHECK-NEXT: vsrai.h $vr0, $vr0, 3 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i16>, ptr %a0 + %v1 = sdiv <8 x i16> %v0, + store <8 x i16> %v1, ptr %res + ret void +} + +define void @sdiv_v4i32_8(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: sdiv_v4i32_8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vsrai.w $vr1, $vr0, 31 +; CHECK-NEXT: vsrli.w $vr1, $vr1, 29 +; CHECK-NEXT: vadd.w $vr0, $vr0, $vr1 +; CHECK-NEXT: vsrai.w $vr0, $vr0, 3 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i32>, ptr %a0 + %v1 = sdiv <4 x i32> %v0, + store <4 x i32> %v1, ptr %res + ret void +} + +define void @sdiv_v2i64_8(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: sdiv_v2i64_8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vsrai.d $vr1, $vr0, 63 +; CHECK-NEXT: vsrli.d $vr1, $vr1, 61 +; CHECK-NEXT: vadd.d $vr0, $vr0, $vr1 +; CHECK-NEXT: vsrai.d $vr0, $vr0, 3 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <2 x i64>, ptr %a0 + %v1 = sdiv <2 x i64> %v0, + store <2 x i64> %v1, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shl.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shl.ll new file mode 100644 index 0000000000000000000000000000000000000000..fa0aebaf28b3c5eef7c2cff40ae0e824ae66180b --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shl.ll @@ -0,0 +1,178 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +define void @shl_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: shl_v16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vsll.b $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i8>, ptr %a0 + %v1 = load <16 x i8>, ptr %a1 + %v2 = shl <16 x i8> %v0, %v1 + store <16 x i8> %v2, ptr %res + ret void +} + +define void @shl_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: shl_v8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vsll.h $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i16>, ptr %a0 + %v1 = load <8 x i16>, ptr %a1 + %v2 = shl <8 x i16> %v0, %v1 + store <8 x i16> %v2, ptr %res + ret void +} + +define void @shl_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: shl_v4i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vsll.w $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i32>, ptr %a0 + %v1 = load <4 x i32>, ptr %a1 + %v2 = shl <4 x i32> %v0, %v1 + store <4 x i32> %v2, ptr %res + ret void +} + +define void @shl_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: shl_v2i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vsll.d $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <2 x i64>, ptr %a0 + %v1 = load <2 x i64>, ptr %a1 + %v2 = shl <2 x i64> %v0, %v1 + store <2 x i64> %v2, ptr %res + ret void +} + +define void @shl_v16i8_1(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: shl_v16i8_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vslli.b $vr0, $vr0, 1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i8>, ptr %a0 + %v1 = shl <16 x i8> %v0, + store <16 x i8> %v1, ptr %res + ret void +} + +define void @shl_v16i8_7(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: shl_v16i8_7: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vslli.b $vr0, $vr0, 7 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i8>, ptr %a0 + %v1 = shl <16 x i8> %v0, + store <16 x i8> %v1, ptr %res + ret void +} + +define void @shl_v8i16_1(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: shl_v8i16_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vslli.h $vr0, $vr0, 1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i16>, ptr %a0 + %v1 = shl <8 x i16> %v0, + store <8 x i16> %v1, ptr %res + ret void +} + +define void @shl_v8i16_15(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: shl_v8i16_15: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vslli.h $vr0, $vr0, 15 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i16>, ptr %a0 + %v1 = shl <8 x i16> %v0, + store <8 x i16> %v1, ptr %res + ret void +} + +define void @shl_v4i32_1(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: shl_v4i32_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vslli.w $vr0, $vr0, 1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i32>, ptr %a0 + %v1 = shl <4 x i32> %v0, + store <4 x i32> %v1, ptr %res + ret void +} + +define void @shl_v4i32_31(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: shl_v4i32_31: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vslli.w $vr0, $vr0, 31 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i32>, ptr %a0 + %v1 = shl <4 x i32> %v0, + store <4 x i32> %v1, ptr %res + ret void +} + +define void @shl_v2i64_1(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: shl_v2i64_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vslli.d $vr0, $vr0, 1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <2 x i64>, ptr %a0 + %v1 = shl <2 x i64> %v0, + store <2 x i64> %v1, ptr %res + ret void +} + +define void @shl_v2i64_63(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: shl_v2i64_63: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vslli.d $vr0, $vr0, 63 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <2 x i64>, ptr %a0 + %v1 = shl <2 x i64> %v0, + store <2 x i64> %v1, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vilv.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vilv.ll new file mode 100644 index 0000000000000000000000000000000000000000..31398c6081c0a9357ae3c700dc7ded86c1102355 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vilv.ll @@ -0,0 +1,82 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx %s -o - | FileCheck %s + +;; vilvl.b +define <16 x i8> @shufflevector_vilvl_v16i8(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: shufflevector_vilvl_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vilvl.b $vr0, $vr1, $vr0 +; CHECK-NEXT: ret + %c = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %c +} + +;; vilvl.h +define <8 x i16> @shufflevector_vilvl_v8i16(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: shufflevector_vilvl_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vilvl.h $vr0, $vr1, $vr0 +; CHECK-NEXT: ret + %c = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %c +} + +;; vilvl.w +define <4 x i32> @shufflevector_vilvl_v4i32(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: shufflevector_vilvl_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vilvl.w $vr0, $vr1, $vr0 +; CHECK-NEXT: ret + %c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %c +} + +;; vilvl.w +define <4 x float> @shufflevector_vilvl_v4f32(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: shufflevector_vilvl_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vilvl.w $vr0, $vr1, $vr0 +; CHECK-NEXT: ret + %c = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %c +} + +;; vilvh.b +define <16 x i8> @shufflevector_vilvh_v16i8(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: shufflevector_vilvh_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vilvh.b $vr0, $vr1, $vr0 +; CHECK-NEXT: ret + %c = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %c +} + +;; vilvh.h +define <8 x i16> @shufflevector_vilvh_v8i16(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: shufflevector_vilvh_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vilvh.h $vr0, $vr1, $vr0 +; CHECK-NEXT: ret + %c = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %c +} + +;; vilvh.w +define <4 x i32> @shufflevector_vilvh_v4i32(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: shufflevector_vilvh_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vilvh.w $vr0, $vr1, $vr0 +; CHECK-NEXT: ret + %c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %c +} + +;; vilvh.w +define <4 x float> @shufflevector_vilvh_v4f32(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: shufflevector_vilvh_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vilvh.w $vr0, $vr1, $vr0 +; CHECK-NEXT: ret + %c = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %c +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vpack.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vpack.ll new file mode 100644 index 0000000000000000000000000000000000000000..171e68306cd11026bf5b422870136fcd7e0b5e81 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vpack.ll @@ -0,0 +1,122 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx %s -o - | FileCheck %s + +;; vpackev.b +define <16 x i8> @shufflevector_pack_ev_v16i8(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: shufflevector_pack_ev_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vpackev.b $vr0, $vr1, $vr0 +; CHECK-NEXT: ret + %c = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %c +} + +;; vpackev.h +define <8 x i16> @shufflevector_pack_ev_v8i16(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: shufflevector_pack_ev_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vpackev.h $vr0, $vr1, $vr0 +; CHECK-NEXT: ret + %c = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %c +} + +;; vpackev.w +define <4 x i32> @shufflevector_pack_ev_v4i32(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: shufflevector_pack_ev_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vpackev.w $vr0, $vr1, $vr0 +; CHECK-NEXT: ret + %c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %c +} + +;; vpickev.d/vpackev.d/vilvl.d +define <2 x i64> @shufflevector_pack_ev_v2i64(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: shufflevector_pack_ev_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vpackev.d $vr0, $vr1, $vr0 +; CHECK-NEXT: ret + %c = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> + ret <2 x i64> %c +} + +;; vpackev.w +define <4 x float> @shufflevector_pack_ev_v4f32(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: shufflevector_pack_ev_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vpackev.w $vr0, $vr1, $vr0 +; CHECK-NEXT: ret + %c = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %c +} + +;; vpickev.d/vpackev.d/vilvl.d +define <2 x double> @shufflevector_pack_ev_v2f64(<2 x double> %a, <2 x double> %b) { +; CHECK-LABEL: shufflevector_pack_ev_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vpackev.d $vr0, $vr1, $vr0 +; CHECK-NEXT: ret + %c = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> + ret <2 x double> %c +} + +;; vpackod.b +define <16 x i8> @shufflevector_pack_od_v16i8(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: shufflevector_pack_od_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vpackod.b $vr0, $vr1, $vr0 +; CHECK-NEXT: ret + %c = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %c +} + +;; vpackod.h +define <8 x i16> @shufflevector_pack_od_v8i16(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: shufflevector_pack_od_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vpackod.h $vr0, $vr1, $vr0 +; CHECK-NEXT: ret + %c = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %c +} + +;; vpackod.w +define <4 x i32> @shufflevector_pack_od_v4i32(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: shufflevector_pack_od_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vpackod.w $vr0, $vr1, $vr0 +; CHECK-NEXT: ret + %c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %c +} + +;; vpickod.d/vpackod.d/vilvh.d +define <2 x i64> @shufflodector_pack_od_v2i64(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: shufflodector_pack_od_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vpackod.d $vr0, $vr1, $vr0 +; CHECK-NEXT: ret + %c = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> + ret <2 x i64> %c +} + +;; vpackod.w +define <4 x float> @shufflodector_pack_od_v4f32(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: shufflodector_pack_od_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vpackod.w $vr0, $vr1, $vr0 +; CHECK-NEXT: ret + %c = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %c +} + +;; vpickod.d/vpackod.d/vilvh.d +define <2 x double> @shufflodector_pack_od_v2f64(<2 x double> %a, <2 x double> %b) { +; CHECK-LABEL: shufflodector_pack_od_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vpackod.d $vr0, $vr1, $vr0 +; CHECK-NEXT: ret + %c = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> + ret <2 x double> %c +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vpick.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vpick.ll new file mode 100644 index 0000000000000000000000000000000000000000..ca636d942b583814f7bae973dd03500690b9f719 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vpick.ll @@ -0,0 +1,82 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx %s -o - | FileCheck %s + +;; vpickev.b +define <16 x i8> @shufflevector_pick_ev_v16i8(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: shufflevector_pick_ev_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vpickev.b $vr0, $vr1, $vr0 +; CHECK-NEXT: ret + %c = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %c +} + +;; vpickev.h +define <8 x i16> @shufflevector_pick_ev_v8i16(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: shufflevector_pick_ev_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vpickev.h $vr0, $vr1, $vr0 +; CHECK-NEXT: ret + %c = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %c +} + +;; vpickev.w +define <4 x i32> @shufflevector_pick_ev_v4i32(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: shufflevector_pick_ev_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vpickev.w $vr0, $vr1, $vr0 +; CHECK-NEXT: ret + %c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %c +} + +;; vpickev.w +define <4 x float> @shufflevector_pick_ev_v4f32(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: shufflevector_pick_ev_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vpickev.w $vr0, $vr1, $vr0 +; CHECK-NEXT: ret + %c = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %c +} + +;; vpickod.b +define <16 x i8> @shufflevector_pick_od_v16i8(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: shufflevector_pick_od_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vpickod.b $vr0, $vr1, $vr0 +; CHECK-NEXT: ret + %c = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %c +} + +;; vpickod.h +define <8 x i16> @shufflevector_pick_od_v8i16(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: shufflevector_pick_od_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vpickod.h $vr0, $vr1, $vr0 +; CHECK-NEXT: ret + %c = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %c +} + +;; vpickod.w +define <4 x i32> @shufflevector_pick_od_v4i32(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: shufflevector_pick_od_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vpickod.w $vr0, $vr1, $vr0 +; CHECK-NEXT: ret + %c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %c +} + +;; vpickod.w +define <4 x float> @shufflodector_pick_od_v4f32(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: shufflodector_pick_od_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vpickod.w $vr0, $vr1, $vr0 +; CHECK-NEXT: ret + %c = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %c +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vreplvei.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vreplvei.ll new file mode 100644 index 0000000000000000000000000000000000000000..10510786f3216287237c598ea8042ca853b6a06a --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vreplvei.ll @@ -0,0 +1,62 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx %s -o - | FileCheck %s + +;; vreplvei.b +define <16 x i8> @shufflevector_v16i8(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: shufflevector_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vreplvei.b $vr0, $vr0, 1 +; CHECK-NEXT: ret + %c = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %c +} + +;; vreplvei.h +define <8 x i16> @shufflevector_v8i16(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: shufflevector_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vreplvei.h $vr0, $vr1, 2 +; CHECK-NEXT: ret + %c = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %c +} + +;; vreplvei.w +define <4 x i32> @shufflevector_v4i32(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: shufflevector_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vreplvei.w $vr0, $vr0, 0 +; CHECK-NEXT: ret + %c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %c +} + +;; vreplvei.d +define <2 x i64> @shufflevector_v2i64(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: shufflevector_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vreplvei.d $vr0, $vr0, 1 +; CHECK-NEXT: ret + %c = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> + ret <2 x i64> %c +} + +;; vreplvei.w +define <4 x float> @shufflevector_v4f32(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: shufflevector_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vreplvei.w $vr0, $vr0, 0 +; CHECK-NEXT: ret + %c = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %c +} + +;; vreplvei.d +define <2 x double> @shufflevector_v2f64(<2 x double> %a, <2 x double> %b) { +; CHECK-LABEL: shufflevector_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vreplvei.d $vr0, $vr0, 1 +; CHECK-NEXT: ret + %c = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> + ret <2 x double> %c +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf.ll new file mode 100644 index 0000000000000000000000000000000000000000..55800b31446b3d82a5f7efab3483a8f9d2ebb481 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf.ll @@ -0,0 +1,84 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx %s -o - | FileCheck %s + +define <16 x i8> @shufflevector_v16i8(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: shufflevector_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI0_0) +; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI0_0) +; CHECK-NEXT: vld $vr2, $a0, 0 +; CHECK-NEXT: vshuf.b $vr0, $vr1, $vr0, $vr2 +; CHECK-NEXT: ret + %c = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %c +} + +;; vshuf.h +define <8 x i16> @shufflevector_v8i16(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: shufflevector_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI1_0) +; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI1_0) +; CHECK-NEXT: vld $vr2, $a0, 0 +; CHECK-NEXT: vshuf.h $vr2, $vr1, $vr0 +; CHECK-NEXT: vori.b $vr0, $vr2, 0 +; CHECK-NEXT: ret + %c = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %c +} + +;; vshuf.w +define <4 x i32> @shufflevector_v4i32(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: shufflevector_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_0) +; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI2_0) +; CHECK-NEXT: vld $vr2, $a0, 0 +; CHECK-NEXT: vshuf.w $vr2, $vr1, $vr0 +; CHECK-NEXT: vori.b $vr0, $vr2, 0 +; CHECK-NEXT: ret + %c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %c +} + +;; vshuf.d +define <2 x i64> @shufflevector_v2i64(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: shufflevector_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI3_0) +; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI3_0) +; CHECK-NEXT: vld $vr2, $a0, 0 +; CHECK-NEXT: vshuf.d $vr2, $vr1, $vr0 +; CHECK-NEXT: vori.b $vr0, $vr2, 0 +; CHECK-NEXT: ret + %c = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> + ret <2 x i64> %c +} + +;; vshuf.w +define <4 x float> @shufflevector_v4f32(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: shufflevector_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI4_0) +; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI4_0) +; CHECK-NEXT: vld $vr2, $a0, 0 +; CHECK-NEXT: vshuf.w $vr2, $vr1, $vr0 +; CHECK-NEXT: vori.b $vr0, $vr2, 0 +; CHECK-NEXT: ret + %c = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %c +} + +;; vshuf.d +define <2 x double> @shufflevector_v2f64(<2 x double> %a, <2 x double> %b) { +; CHECK-LABEL: shufflevector_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI5_0) +; CHECK-NEXT: addi.d $a0, $a0, %pc_lo12(.LCPI5_0) +; CHECK-NEXT: vld $vr2, $a0, 0 +; CHECK-NEXT: vshuf.d $vr2, $vr1, $vr0 +; CHECK-NEXT: vori.b $vr0, $vr2, 0 +; CHECK-NEXT: ret + %c = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> + ret <2 x double> %c +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf4i.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf4i.ll new file mode 100644 index 0000000000000000000000000000000000000000..660b9581c3d1f555b358afd67e2bf664c454355b --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/shuffle-as-vshuf4i.ll @@ -0,0 +1,42 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx %s -o - | FileCheck %s + +;; vilvh.b +define <16 x i8> @shufflevector_vshuf4i_v16i8(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: shufflevector_vshuf4i_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vshuf4i.b $vr0, $vr0, 27 +; CHECK-NEXT: ret + %c = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %c +} + +;; vilvh.h +define <8 x i16> @shufflevector_vshuf4i_v8i4(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: shufflevector_vshuf4i_v8i4: +; CHECK: # %bb.0: +; CHECK-NEXT: vshuf4i.h $vr0, $vr0, 27 +; CHECK-NEXT: ret + %c = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %c +} + +;; vilvh.w +define <4 x i32> @shufflevector_vshuf4i_v4i32(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: shufflevector_vshuf4i_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vshuf4i.w $vr0, $vr0, 27 +; CHECK-NEXT: ret + %c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %c +} + +;; vilvh.w +define <4 x float> @shufflevector_vshuf4i_v4f32(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: shufflevector_vshuf4i_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vshuf4i.w $vr0, $vr0, 27 +; CHECK-NEXT: ret + %c = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %c +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sitofp.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sitofp.ll new file mode 100644 index 0000000000000000000000000000000000000000..1e820a37a2409eae7187ddd97e4b5b021ac51365 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sitofp.ll @@ -0,0 +1,28 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +define void @sitofp_v4i32_v4f32(ptr %res, ptr %in){ +; CHECK-LABEL: sitofp_v4i32_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vffint.s.w $vr0, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i32>, ptr %in + %v1 = sitofp <4 x i32> %v0 to <4 x float> + store <4 x float> %v1, ptr %res + ret void +} + +define void @sitofp_v2i64_v2f64(ptr %res, ptr %in){ +; CHECK-LABEL: sitofp_v2i64_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vffint.d.l $vr0, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <2 x i64>, ptr %in + %v1 = sitofp <2 x i64> %v0 to <2 x double> + store <2 x double> %v1, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sub.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sub.ll new file mode 100644 index 0000000000000000000000000000000000000000..25b4623a47d1fc15b1361a4ba843fc2c45349de3 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/sub.ll @@ -0,0 +1,122 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +define void @sub_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: sub_v16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vsub.b $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i8>, ptr %a0 + %v1 = load <16 x i8>, ptr %a1 + %v2 = sub <16 x i8> %v0, %v1 + store <16 x i8> %v2, ptr %res + ret void +} + +define void @sub_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: sub_v8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vsub.h $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i16>, ptr %a0 + %v1 = load <8 x i16>, ptr %a1 + %v2 = sub <8 x i16> %v0, %v1 + store <8 x i16> %v2, ptr %res + ret void +} + +define void @sub_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: sub_v4i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vsub.w $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i32>, ptr %a0 + %v1 = load <4 x i32>, ptr %a1 + %v2 = sub <4 x i32> %v0, %v1 + store <4 x i32> %v2, ptr %res + ret void +} + +define void @sub_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: sub_v2i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vsub.d $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <2 x i64>, ptr %a0 + %v1 = load <2 x i64>, ptr %a1 + %v2 = sub <2 x i64> %v0, %v1 + store <2 x i64> %v2, ptr %res + ret void +} + +define void @sub_v16i8_31(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: sub_v16i8_31: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vsubi.bu $vr0, $vr0, 31 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i8>, ptr %a0 + %v1 = sub <16 x i8> %v0, + store <16 x i8> %v1, ptr %res + ret void +} + +define void @sub_v8i16_31(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: sub_v8i16_31: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vsubi.hu $vr0, $vr0, 31 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i16>, ptr %a0 + %v1 = sub <8 x i16> %v0, + store <8 x i16> %v1, ptr %res + ret void +} + +define void @sub_v4i32_31(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: sub_v4i32_31: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vsubi.wu $vr0, $vr0, 31 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i32>, ptr %a0 + %v1 = sub <4 x i32> %v0, + store <4 x i32> %v1, ptr %res + ret void +} + +define void @sub_v2i64_31(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: sub_v2i64_31: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vsubi.du $vr0, $vr0, 31 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <2 x i64>, ptr %a0 + %v1 = sub <2 x i64> %v0, + store <2 x i64> %v1, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/udiv.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/udiv.ll new file mode 100644 index 0000000000000000000000000000000000000000..abb60b91dd488f548b8b23e46063b5e6cad4931e --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/udiv.ll @@ -0,0 +1,122 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +define void @udiv_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: udiv_v16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vdiv.bu $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i8>, ptr %a0 + %v1 = load <16 x i8>, ptr %a1 + %v2 = udiv <16 x i8> %v0, %v1 + store <16 x i8> %v2, ptr %res + ret void +} + +define void @udiv_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: udiv_v8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vdiv.hu $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i16>, ptr %a0 + %v1 = load <8 x i16>, ptr %a1 + %v2 = udiv <8 x i16> %v0, %v1 + store <8 x i16> %v2, ptr %res + ret void +} + +define void @udiv_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: udiv_v4i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vdiv.wu $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i32>, ptr %a0 + %v1 = load <4 x i32>, ptr %a1 + %v2 = udiv <4 x i32> %v0, %v1 + store <4 x i32> %v2, ptr %res + ret void +} + +define void @udiv_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: udiv_v2i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vdiv.du $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <2 x i64>, ptr %a0 + %v1 = load <2 x i64>, ptr %a1 + %v2 = udiv <2 x i64> %v0, %v1 + store <2 x i64> %v2, ptr %res + ret void +} + +define void @udiv_v16i8_8(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: udiv_v16i8_8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vsrli.b $vr0, $vr0, 3 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i8>, ptr %a0 + %v1 = udiv <16 x i8> %v0, + store <16 x i8> %v1, ptr %res + ret void +} + +define void @udiv_v8i16_8(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: udiv_v8i16_8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vsrli.h $vr0, $vr0, 3 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i16>, ptr %a0 + %v1 = udiv <8 x i16> %v0, + store <8 x i16> %v1, ptr %res + ret void +} + +define void @udiv_v4i32_8(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: udiv_v4i32_8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vsrli.w $vr0, $vr0, 3 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i32>, ptr %a0 + %v1 = udiv <4 x i32> %v0, + store <4 x i32> %v1, ptr %res + ret void +} + +define void @udiv_v2i64_8(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: udiv_v2i64_8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vsrli.d $vr0, $vr0, 3 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <2 x i64>, ptr %a0 + %v1 = udiv <2 x i64> %v0, + store <2 x i64> %v1, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/uitofp.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/uitofp.ll new file mode 100644 index 0000000000000000000000000000000000000000..3d4913f12e57e1400ba3a2700ab7a1a6f7ef7037 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/uitofp.ll @@ -0,0 +1,28 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +define void @uitofp_v4i32_v4f32(ptr %res, ptr %in){ +; CHECK-LABEL: uitofp_v4i32_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vffint.s.wu $vr0, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i32>, ptr %in + %v1 = uitofp <4 x i32> %v0 to <4 x float> + store <4 x float> %v1, ptr %res + ret void +} + +define void @uitofp_v2i64_v2f64(ptr %res, ptr %in){ +; CHECK-LABEL: uitofp_v2i64_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vffint.d.lu $vr0, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <2 x i64>, ptr %in + %v1 = uitofp <2 x i64> %v0 to <2 x double> + store <2 x double> %v1, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/xor.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/xor.ll new file mode 100644 index 0000000000000000000000000000000000000000..ce3e49c990ffb014baac4bfbe2607b261c231dba --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/xor.ll @@ -0,0 +1,125 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +define void @xor_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: xor_v16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vxor.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i8>, ptr %a0 + %v1 = load <16 x i8>, ptr %a1 + %v2 = xor <16 x i8> %v0, %v1 + store <16 x i8> %v2, ptr %res + ret void +} + +define void @xor_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: xor_v8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vxor.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i16>, ptr %a0 + %v1 = load <8 x i16>, ptr %a1 + %v2 = xor <8 x i16> %v0, %v1 + store <8 x i16> %v2, ptr %res + ret void +} + +define void @xor_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: xor_v4i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vxor.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i32>, ptr %a0 + %v1 = load <4 x i32>, ptr %a1 + %v2 = xor <4 x i32> %v0, %v1 + store <4 x i32> %v2, ptr %res + ret void +} + +define void @xor_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: xor_v2i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vxor.v $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <2 x i64>, ptr %a0 + %v1 = load <2 x i64>, ptr %a1 + %v2 = xor <2 x i64> %v0, %v1 + store <2 x i64> %v2, ptr %res + ret void +} + +define void @xor_u_v16i8(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: xor_u_v16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vxori.b $vr0, $vr0, 31 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i8>, ptr %a0 + %v1 = xor <16 x i8> %v0, + store <16 x i8> %v1, ptr %res + ret void +} + +define void @xor_u_v8i16(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: xor_u_v8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vrepli.h $vr1, 31 +; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i16>, ptr %a0 + %v1 = xor <8 x i16> %v0, + store <8 x i16> %v1, ptr %res + ret void +} + +define void @xor_u_v4i32(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: xor_u_v4i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vrepli.w $vr1, 31 +; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i32>, ptr %a0 + %v1 = xor <4 x i32> %v0, + store <4 x i32> %v1, ptr %res + ret void +} + +define void @xor_u_v2i64(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: xor_u_v2i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vrepli.d $vr1, 31 +; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <2 x i64>, ptr %a0 + %v1 = xor <2 x i64> %v0, + store <2 x i64> %v1, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/mulh.ll b/llvm/test/CodeGen/LoongArch/lsx/mulh.ll new file mode 100644 index 0000000000000000000000000000000000000000..e1388f00e355fb81d68f7c6da9b7bdaa6827b545 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/mulh.ll @@ -0,0 +1,162 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +define void @mulhs_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: mulhs_v16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vmuh.b $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i8>, ptr %a0 + %v1 = load <16 x i8>, ptr %a1 + %v0s = sext <16 x i8> %v0 to <16 x i16> + %v1s = sext <16 x i8> %v1 to <16 x i16> + %m = mul <16 x i16> %v0s, %v1s + %s = ashr <16 x i16> %m, + %v2 = trunc <16 x i16> %s to <16 x i8> + store <16 x i8> %v2, ptr %res + ret void +} + +define void @mulhu_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: mulhu_v16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vmuh.bu $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <16 x i8>, ptr %a0 + %v1 = load <16 x i8>, ptr %a1 + %v0z = zext <16 x i8> %v0 to <16 x i16> + %v1z = zext <16 x i8> %v1 to <16 x i16> + %m = mul <16 x i16> %v0z, %v1z + %s = lshr <16 x i16> %m, + %v2 = trunc <16 x i16> %s to <16 x i8> + store <16 x i8> %v2, ptr %res + ret void +} + +define void @mulhs_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: mulhs_v8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vmuh.h $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i16>, ptr %a0 + %v1 = load <8 x i16>, ptr %a1 + %v0s = sext <8 x i16> %v0 to <8 x i32> + %v1s = sext <8 x i16> %v1 to <8 x i32> + %m = mul <8 x i32> %v0s, %v1s + %s = ashr <8 x i32> %m, + %v2 = trunc <8 x i32> %s to <8 x i16> + store <8 x i16> %v2, ptr %res + ret void +} + +define void @mulhu_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: mulhu_v8i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vmuh.hu $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <8 x i16>, ptr %a0 + %v1 = load <8 x i16>, ptr %a1 + %v0z = zext <8 x i16> %v0 to <8 x i32> + %v1z = zext <8 x i16> %v1 to <8 x i32> + %m = mul <8 x i32> %v0z, %v1z + %s = lshr <8 x i32> %m, + %v2 = trunc <8 x i32> %s to <8 x i16> + store <8 x i16> %v2, ptr %res + ret void +} + +define void @mulhs_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: mulhs_v4i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vmuh.w $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i32>, ptr %a0 + %v1 = load <4 x i32>, ptr %a1 + %v0s = sext <4 x i32> %v0 to <4 x i64> + %v1s = sext <4 x i32> %v1 to <4 x i64> + %m = mul <4 x i64> %v0s, %v1s + %s = ashr <4 x i64> %m, + %v2 = trunc <4 x i64> %s to <4 x i32> + store <4 x i32> %v2, ptr %res + ret void +} + +define void @mulhu_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: mulhu_v4i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vmuh.wu $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <4 x i32>, ptr %a0 + %v1 = load <4 x i32>, ptr %a1 + %v0z = zext <4 x i32> %v0 to <4 x i64> + %v1z = zext <4 x i32> %v1 to <4 x i64> + %m = mul <4 x i64> %v0z, %v1z + %s = lshr <4 x i64> %m, + %v2 = trunc <4 x i64> %s to <4 x i32> + store <4 x i32> %v2, ptr %res + ret void +} + +define void @mulhs_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: mulhs_v2i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vmuh.d $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <2 x i64>, ptr %a0 + %v1 = load <2 x i64>, ptr %a1 + %v0s = sext <2 x i64> %v0 to <2 x i128> + %v1s = sext <2 x i64> %v1 to <2 x i128> + %m = mul <2 x i128> %v0s, %v1s + %s = ashr <2 x i128> %m, + %v2 = trunc <2 x i128> %s to <2 x i64> + store <2 x i64> %v2, ptr %res + ret void +} + +define void @mulhu_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: mulhu_v2i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a2, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vmuh.du $vr0, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret +entry: + %v0 = load <2 x i64>, ptr %a0 + %v1 = load <2 x i64>, ptr %a1 + %v0z = zext <2 x i64> %v0 to <2 x i128> + %v1z = zext <2 x i64> %v1 to <2 x i128> + %m = mul <2 x i128> %v0z, %v1z + %s = lshr <2 x i128> %m, + %v2 = trunc <2 x i128> %s to <2 x i64> + store <2 x i64> %v2, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/vselect.ll b/llvm/test/CodeGen/LoongArch/lsx/vselect.ll new file mode 100644 index 0000000000000000000000000000000000000000..746152f0f0264190d5b3f3725dc477132512db88 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/vselect.ll @@ -0,0 +1,86 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +define void @select_v16i8_imm(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: select_v16i8_imm: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vrepli.h $vr1, -256 +; CHECK-NEXT: vbitseli.b $vr1, $vr0, 255 +; CHECK-NEXT: vst $vr1, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i8>, ptr %a0 + %sel = select <16 x i1> , <16 x i8> , <16 x i8> %v0 + store <16 x i8> %sel, ptr %res + ret void +} + +define void @select_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: select_v16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: vrepli.h $vr2, -256 +; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr0, $vr2 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i8>, ptr %a0 + %v1 = load <16 x i8>, ptr %a1 + %sel = select <16 x i1> , <16 x i8> %v0, <16 x i8> %v1 + store <16 x i8> %sel, ptr %res + ret void +} + +define void @select_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: select_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: lu12i.w $a1, -16 +; CHECK-NEXT: lu32i.d $a1, 0 +; CHECK-NEXT: vreplgr2vr.w $vr2, $a1 +; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr0, $vr2 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i16>, ptr %a0 + %v1 = load <8 x i16>, ptr %a1 + %sel = select <8 x i1> , <8 x i16> %v0, <8 x i16> %v1 + store <8 x i16> %sel, ptr %res + ret void +} + +define void @select_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: select_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: ori $a1, $zero, 0 +; CHECK-NEXT: lu32i.d $a1, -1 +; CHECK-NEXT: vreplgr2vr.d $vr2, $a1 +; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr0, $vr2 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i32>, ptr %a0 + %v1 = load <4 x i32>, ptr %a1 + %sel = select <4 x i1> , <4 x i32> %v0, <4 x i32> %v1 + store <4 x i32> %sel, ptr %res + ret void +} + +define void @select_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: select_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: pcalau12i $a3, %pc_hi20(.LCPI4_0) +; CHECK-NEXT: addi.d $a3, $a3, %pc_lo12(.LCPI4_0) +; CHECK-NEXT: vld $vr0, $a3, 0 +; CHECK-NEXT: vld $vr1, $a1, 0 +; CHECK-NEXT: vld $vr2, $a2, 0 +; CHECK-NEXT: vbitsel.v $vr0, $vr2, $vr1, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <2 x i64>, ptr %a0 + %v1 = load <2 x i64>, ptr %a1 + %sel = select <2 x i1> , <2 x i64> %v0, <2 x i64> %v1 + store <2 x i64> %sel, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/opt-pipeline.ll b/llvm/test/CodeGen/LoongArch/opt-pipeline.ll index f4a01a52b544adeaf420af51064b864e54595391..aedc747d2d4ab64ef52b1e2fcb22b162ac498458 100644 --- a/llvm/test/CodeGen/LoongArch/opt-pipeline.ll +++ b/llvm/test/CodeGen/LoongArch/opt-pipeline.ll @@ -155,6 +155,7 @@ ; CHECK-NEXT: Contiguously Lay Out Funclets ; CHECK-NEXT: StackMap Liveness Analysis ; CHECK-NEXT: Live DEBUG_VALUE analysis +; CHECK-NEXT: LoongArch pseudo instruction expansion pass ; CHECK-NEXT: LoongArch atomic pseudo instruction expansion pass ; CHECK-NEXT: Lazy Machine Block Frequency Analysis ; CHECK-NEXT: Machine Optimization Remark Emitter diff --git a/llvm/test/CodeGen/LoongArch/psabi-restricted-scheduling.ll b/llvm/test/CodeGen/LoongArch/psabi-restricted-scheduling.ll new file mode 100644 index 0000000000000000000000000000000000000000..474436a0126b91eb03e71d494a77d518e8fff6cc --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/psabi-restricted-scheduling.ll @@ -0,0 +1,168 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc --mtriple=loongarch64 --code-model=medium --post-RA-scheduler=0 < %s \ +; RUN: | FileCheck %s --check-prefix=MEDIUM_NO_SCH +; RUN: llc --mtriple=loongarch64 --code-model=medium --post-RA-scheduler=1 < %s \ +; RUN: | FileCheck %s --check-prefix=MEDIUM_SCH +; RUN: llc --mtriple=loongarch64 --code-model=large --post-RA-scheduler=0 < %s \ +; RUN: | FileCheck %s --check-prefix=LARGE_NO_SCH +; RUN: llc --mtriple=loongarch64 --code-model=large --post-RA-scheduler=1 < %s \ +; RUN: | FileCheck %s --check-prefix=LARGE_SCH + +@g = dso_local global i64 zeroinitializer, align 4 +@G = global i64 zeroinitializer, align 4 +@gd = external thread_local global i64 +@ld = external thread_local(localdynamic) global i64 +@ie = external thread_local(initialexec) global i64 + +declare ptr @bar(i64) + +define void @foo() nounwind { +; MEDIUM_NO_SCH-LABEL: foo: +; MEDIUM_NO_SCH: # %bb.0: +; MEDIUM_NO_SCH-NEXT: addi.d $sp, $sp, -16 +; MEDIUM_NO_SCH-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; MEDIUM_NO_SCH-NEXT: pcalau12i $a0, %got_pc_hi20(G) +; MEDIUM_NO_SCH-NEXT: ld.d $a0, $a0, %got_pc_lo12(G) +; MEDIUM_NO_SCH-NEXT: ld.d $a0, $a0, 0 +; MEDIUM_NO_SCH-NEXT: pcalau12i $a0, %pc_hi20(g) +; MEDIUM_NO_SCH-NEXT: addi.d $a0, $a0, %pc_lo12(g) +; MEDIUM_NO_SCH-NEXT: ld.d $a0, $a0, 0 +; MEDIUM_NO_SCH-NEXT: ori $a0, $zero, 1 +; MEDIUM_NO_SCH-NEXT: pcaddu18i $ra, %call36(bar) +; MEDIUM_NO_SCH-NEXT: jirl $ra, $ra, 0 +; MEDIUM_NO_SCH-NEXT: pcalau12i $a0, %ie_pc_hi20(gd) +; MEDIUM_NO_SCH-NEXT: ld.d $a0, $a0, %ie_pc_lo12(gd) +; MEDIUM_NO_SCH-NEXT: ldx.d $a0, $a0, $tp +; MEDIUM_NO_SCH-NEXT: pcalau12i $a0, %ie_pc_hi20(ld) +; MEDIUM_NO_SCH-NEXT: ld.d $a0, $a0, %ie_pc_lo12(ld) +; MEDIUM_NO_SCH-NEXT: ldx.d $a0, $a0, $tp +; MEDIUM_NO_SCH-NEXT: pcalau12i $a0, %ie_pc_hi20(ie) +; MEDIUM_NO_SCH-NEXT: ld.d $a0, $a0, %ie_pc_lo12(ie) +; MEDIUM_NO_SCH-NEXT: ldx.d $a0, $a0, $tp +; MEDIUM_NO_SCH-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; MEDIUM_NO_SCH-NEXT: addi.d $sp, $sp, 16 +; MEDIUM_NO_SCH-NEXT: ret +; +; MEDIUM_SCH-LABEL: foo: +; MEDIUM_SCH: # %bb.0: +; MEDIUM_SCH-NEXT: addi.d $sp, $sp, -16 +; MEDIUM_SCH-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; MEDIUM_SCH-NEXT: pcalau12i $a0, %got_pc_hi20(G) +; MEDIUM_SCH-NEXT: ld.d $a0, $a0, %got_pc_lo12(G) +; MEDIUM_SCH-NEXT: ld.d $a0, $a0, 0 +; MEDIUM_SCH-NEXT: pcalau12i $a0, %pc_hi20(g) +; MEDIUM_SCH-NEXT: addi.d $a0, $a0, %pc_lo12(g) +; MEDIUM_SCH-NEXT: ld.d $a0, $a0, 0 +; MEDIUM_SCH-NEXT: ori $a0, $zero, 1 +; MEDIUM_SCH-NEXT: pcaddu18i $ra, %call36(bar) +; MEDIUM_SCH-NEXT: jirl $ra, $ra, 0 +; MEDIUM_SCH-NEXT: pcalau12i $a0, %ie_pc_hi20(gd) +; MEDIUM_SCH-NEXT: ld.d $a0, $a0, %ie_pc_lo12(gd) +; MEDIUM_SCH-NEXT: ldx.d $a0, $a0, $tp +; MEDIUM_SCH-NEXT: pcalau12i $a0, %ie_pc_hi20(ld) +; MEDIUM_SCH-NEXT: ld.d $a0, $a0, %ie_pc_lo12(ld) +; MEDIUM_SCH-NEXT: ldx.d $a0, $a0, $tp +; MEDIUM_SCH-NEXT: pcalau12i $a0, %ie_pc_hi20(ie) +; MEDIUM_SCH-NEXT: ld.d $a0, $a0, %ie_pc_lo12(ie) +; MEDIUM_SCH-NEXT: ldx.d $a0, $a0, $tp +; MEDIUM_SCH-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; MEDIUM_SCH-NEXT: addi.d $sp, $sp, 16 +; MEDIUM_SCH-NEXT: ret +; +; LARGE_NO_SCH-LABEL: foo: +; LARGE_NO_SCH: # %bb.0: +; LARGE_NO_SCH-NEXT: addi.d $sp, $sp, -16 +; LARGE_NO_SCH-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LARGE_NO_SCH-NEXT: pcalau12i $a0, %got_pc_hi20(G) +; LARGE_NO_SCH-NEXT: addi.d $t8, $zero, %got_pc_lo12(G) +; LARGE_NO_SCH-NEXT: lu32i.d $t8, %got64_pc_lo20(G) +; LARGE_NO_SCH-NEXT: lu52i.d $t8, $t8, %got64_pc_hi12(G) +; LARGE_NO_SCH-NEXT: ldx.d $a0, $t8, $a0 +; LARGE_NO_SCH-NEXT: ld.d $a0, $a0, 0 +; LARGE_NO_SCH-NEXT: pcalau12i $a0, %pc_hi20(g) +; LARGE_NO_SCH-NEXT: addi.d $t8, $zero, %pc_lo12(g) +; LARGE_NO_SCH-NEXT: lu32i.d $t8, %pc64_lo20(g) +; LARGE_NO_SCH-NEXT: lu52i.d $t8, $t8, %pc64_hi12(g) +; LARGE_NO_SCH-NEXT: add.d $a0, $t8, $a0 +; LARGE_NO_SCH-NEXT: ld.d $a0, $a0, 0 +; LARGE_NO_SCH-NEXT: ori $a0, $zero, 1 +; LARGE_NO_SCH-NEXT: pcalau12i $ra, %got_pc_hi20(bar) +; LARGE_NO_SCH-NEXT: addi.d $t8, $zero, %got_pc_lo12(bar) +; LARGE_NO_SCH-NEXT: lu32i.d $t8, %got64_pc_lo20(bar) +; LARGE_NO_SCH-NEXT: lu52i.d $t8, $t8, %got64_pc_hi12(bar) +; LARGE_NO_SCH-NEXT: ldx.d $ra, $t8, $ra +; LARGE_NO_SCH-NEXT: jirl $ra, $ra, 0 +; LARGE_NO_SCH-NEXT: pcalau12i $a0, %ie_pc_hi20(gd) +; LARGE_NO_SCH-NEXT: addi.d $t8, $zero, %ie_pc_lo12(gd) +; LARGE_NO_SCH-NEXT: lu32i.d $t8, %ie64_pc_lo20(gd) +; LARGE_NO_SCH-NEXT: lu52i.d $t8, $t8, %ie64_pc_hi12(gd) +; LARGE_NO_SCH-NEXT: ldx.d $a0, $t8, $a0 +; LARGE_NO_SCH-NEXT: ldx.d $a0, $a0, $tp +; LARGE_NO_SCH-NEXT: pcalau12i $a0, %ie_pc_hi20(ld) +; LARGE_NO_SCH-NEXT: addi.d $t8, $zero, %ie_pc_lo12(ld) +; LARGE_NO_SCH-NEXT: lu32i.d $t8, %ie64_pc_lo20(ld) +; LARGE_NO_SCH-NEXT: lu52i.d $t8, $t8, %ie64_pc_hi12(ld) +; LARGE_NO_SCH-NEXT: ldx.d $a0, $t8, $a0 +; LARGE_NO_SCH-NEXT: ldx.d $a0, $a0, $tp +; LARGE_NO_SCH-NEXT: pcalau12i $a0, %ie_pc_hi20(ie) +; LARGE_NO_SCH-NEXT: addi.d $t8, $zero, %ie_pc_lo12(ie) +; LARGE_NO_SCH-NEXT: lu32i.d $t8, %ie64_pc_lo20(ie) +; LARGE_NO_SCH-NEXT: lu52i.d $t8, $t8, %ie64_pc_hi12(ie) +; LARGE_NO_SCH-NEXT: ldx.d $a0, $t8, $a0 +; LARGE_NO_SCH-NEXT: ldx.d $a0, $a0, $tp +; LARGE_NO_SCH-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LARGE_NO_SCH-NEXT: addi.d $sp, $sp, 16 +; LARGE_NO_SCH-NEXT: ret +; +; LARGE_SCH-LABEL: foo: +; LARGE_SCH: # %bb.0: +; LARGE_SCH-NEXT: addi.d $sp, $sp, -16 +; LARGE_SCH-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LARGE_SCH-NEXT: pcalau12i $a0, %got_pc_hi20(G) +; LARGE_SCH-NEXT: addi.d $t8, $zero, %got_pc_lo12(G) +; LARGE_SCH-NEXT: lu32i.d $t8, %got64_pc_lo20(G) +; LARGE_SCH-NEXT: lu52i.d $t8, $t8, %got64_pc_hi12(G) +; LARGE_SCH-NEXT: ldx.d $a0, $t8, $a0 +; LARGE_SCH-NEXT: ld.d $a0, $a0, 0 +; LARGE_SCH-NEXT: pcalau12i $a0, %pc_hi20(g) +; LARGE_SCH-NEXT: addi.d $t8, $zero, %pc_lo12(g) +; LARGE_SCH-NEXT: lu32i.d $t8, %pc64_lo20(g) +; LARGE_SCH-NEXT: lu52i.d $t8, $t8, %pc64_hi12(g) +; LARGE_SCH-NEXT: add.d $a0, $t8, $a0 +; LARGE_SCH-NEXT: ld.d $a0, $a0, 0 +; LARGE_SCH-NEXT: ori $a0, $zero, 1 +; LARGE_SCH-NEXT: pcalau12i $ra, %got_pc_hi20(bar) +; LARGE_SCH-NEXT: addi.d $t8, $zero, %got_pc_lo12(bar) +; LARGE_SCH-NEXT: lu32i.d $t8, %got64_pc_lo20(bar) +; LARGE_SCH-NEXT: lu52i.d $t8, $t8, %got64_pc_hi12(bar) +; LARGE_SCH-NEXT: ldx.d $ra, $t8, $ra +; LARGE_SCH-NEXT: jirl $ra, $ra, 0 +; LARGE_SCH-NEXT: pcalau12i $a0, %ie_pc_hi20(gd) +; LARGE_SCH-NEXT: addi.d $t8, $zero, %ie_pc_lo12(gd) +; LARGE_SCH-NEXT: lu32i.d $t8, %ie64_pc_lo20(gd) +; LARGE_SCH-NEXT: lu52i.d $t8, $t8, %ie64_pc_hi12(gd) +; LARGE_SCH-NEXT: ldx.d $a0, $t8, $a0 +; LARGE_SCH-NEXT: ldx.d $a0, $a0, $tp +; LARGE_SCH-NEXT: pcalau12i $a0, %ie_pc_hi20(ld) +; LARGE_SCH-NEXT: addi.d $t8, $zero, %ie_pc_lo12(ld) +; LARGE_SCH-NEXT: lu32i.d $t8, %ie64_pc_lo20(ld) +; LARGE_SCH-NEXT: lu52i.d $t8, $t8, %ie64_pc_hi12(ld) +; LARGE_SCH-NEXT: ldx.d $a0, $t8, $a0 +; LARGE_SCH-NEXT: ldx.d $a0, $a0, $tp +; LARGE_SCH-NEXT: pcalau12i $a0, %ie_pc_hi20(ie) +; LARGE_SCH-NEXT: addi.d $t8, $zero, %ie_pc_lo12(ie) +; LARGE_SCH-NEXT: lu32i.d $t8, %ie64_pc_lo20(ie) +; LARGE_SCH-NEXT: lu52i.d $t8, $t8, %ie64_pc_hi12(ie) +; LARGE_SCH-NEXT: ldx.d $a0, $t8, $a0 +; LARGE_SCH-NEXT: ldx.d $a0, $a0, $tp +; LARGE_SCH-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LARGE_SCH-NEXT: addi.d $sp, $sp, 16 +; LARGE_SCH-NEXT: ret + %V = load volatile i64, ptr @G + %v = load volatile i64, ptr @g + call void @bar(i64 1) + %v_gd = load volatile i64, ptr @gd + %v_ld = load volatile i64, ptr @ld + %v_ie = load volatile i64, ptr @ie + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/smul-with-overflow.ll b/llvm/test/CodeGen/LoongArch/smul-with-overflow.ll new file mode 100644 index 0000000000000000000000000000000000000000..6cba4108d63c6f8ec5875b2bec8b73a608a1d6d8 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/smul-with-overflow.ll @@ -0,0 +1,439 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32 +; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64 + +define zeroext i1 @smuloi64(i64 %v1, i64 %v2, ptr %res) { +; LA32-LABEL: smuloi64: +; LA32: # %bb.0: +; LA32-NEXT: srai.w $a5, $a1, 31 +; LA32-NEXT: mul.w $a6, $a2, $a5 +; LA32-NEXT: mulh.wu $a7, $a2, $a5 +; LA32-NEXT: add.w $a7, $a7, $a6 +; LA32-NEXT: mul.w $a5, $a3, $a5 +; LA32-NEXT: add.w $a5, $a7, $a5 +; LA32-NEXT: srai.w $a7, $a3, 31 +; LA32-NEXT: mul.w $t0, $a7, $a1 +; LA32-NEXT: mulh.wu $t1, $a7, $a0 +; LA32-NEXT: add.w $t0, $t1, $t0 +; LA32-NEXT: mul.w $a7, $a7, $a0 +; LA32-NEXT: add.w $t0, $t0, $a7 +; LA32-NEXT: add.w $a5, $t0, $a5 +; LA32-NEXT: mulh.wu $t0, $a0, $a2 +; LA32-NEXT: mul.w $t1, $a1, $a2 +; LA32-NEXT: add.w $t0, $t1, $t0 +; LA32-NEXT: sltu $t1, $t0, $t1 +; LA32-NEXT: mulh.wu $t2, $a1, $a2 +; LA32-NEXT: add.w $t1, $t2, $t1 +; LA32-NEXT: mul.w $t2, $a0, $a3 +; LA32-NEXT: add.w $t0, $t2, $t0 +; LA32-NEXT: sltu $t2, $t0, $t2 +; LA32-NEXT: mulh.wu $t3, $a0, $a3 +; LA32-NEXT: add.w $t2, $t3, $t2 +; LA32-NEXT: add.w $a6, $a7, $a6 +; LA32-NEXT: sltu $a7, $a6, $a7 +; LA32-NEXT: add.w $a5, $a5, $a7 +; LA32-NEXT: mul.w $a0, $a0, $a2 +; LA32-NEXT: mul.w $a2, $a1, $a3 +; LA32-NEXT: mulh.wu $a1, $a1, $a3 +; LA32-NEXT: add.w $a3, $t1, $t2 +; LA32-NEXT: sltu $a7, $a3, $t1 +; LA32-NEXT: add.w $a1, $a1, $a7 +; LA32-NEXT: st.w $a0, $a4, 0 +; LA32-NEXT: add.w $a0, $a2, $a3 +; LA32-NEXT: sltu $a2, $a0, $a2 +; LA32-NEXT: add.w $a1, $a1, $a2 +; LA32-NEXT: st.w $t0, $a4, 4 +; LA32-NEXT: add.w $a1, $a1, $a5 +; LA32-NEXT: add.w $a2, $a0, $a6 +; LA32-NEXT: sltu $a0, $a2, $a0 +; LA32-NEXT: add.w $a0, $a1, $a0 +; LA32-NEXT: srai.w $a1, $t0, 31 +; LA32-NEXT: xor $a0, $a0, $a1 +; LA32-NEXT: xor $a1, $a2, $a1 +; LA32-NEXT: or $a0, $a1, $a0 +; LA32-NEXT: sltu $a0, $zero, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: smuloi64: +; LA64: # %bb.0: +; LA64-NEXT: mul.d $a3, $a0, $a1 +; LA64-NEXT: st.d $a3, $a2, 0 +; LA64-NEXT: mulh.d $a0, $a0, $a1 +; LA64-NEXT: srai.d $a1, $a3, 63 +; LA64-NEXT: xor $a0, $a0, $a1 +; LA64-NEXT: sltu $a0, $zero, $a0 +; LA64-NEXT: ret + %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, ptr %res + ret i1 %obit +} + +define zeroext i1 @smuloi128(i128 %v1, i128 %v2, ptr %res) { +; LA32-LABEL: smuloi128: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -96 +; LA32-NEXT: .cfi_def_cfa_offset 96 +; LA32-NEXT: st.w $ra, $sp, 92 # 4-byte Folded Spill +; LA32-NEXT: st.w $fp, $sp, 88 # 4-byte Folded Spill +; LA32-NEXT: st.w $s0, $sp, 84 # 4-byte Folded Spill +; LA32-NEXT: st.w $s1, $sp, 80 # 4-byte Folded Spill +; LA32-NEXT: st.w $s2, $sp, 76 # 4-byte Folded Spill +; LA32-NEXT: st.w $s3, $sp, 72 # 4-byte Folded Spill +; LA32-NEXT: st.w $s4, $sp, 68 # 4-byte Folded Spill +; LA32-NEXT: st.w $s5, $sp, 64 # 4-byte Folded Spill +; LA32-NEXT: st.w $s6, $sp, 60 # 4-byte Folded Spill +; LA32-NEXT: st.w $s7, $sp, 56 # 4-byte Folded Spill +; LA32-NEXT: st.w $s8, $sp, 52 # 4-byte Folded Spill +; LA32-NEXT: .cfi_offset 1, -4 +; LA32-NEXT: .cfi_offset 22, -8 +; LA32-NEXT: .cfi_offset 23, -12 +; LA32-NEXT: .cfi_offset 24, -16 +; LA32-NEXT: .cfi_offset 25, -20 +; LA32-NEXT: .cfi_offset 26, -24 +; LA32-NEXT: .cfi_offset 27, -28 +; LA32-NEXT: .cfi_offset 28, -32 +; LA32-NEXT: .cfi_offset 29, -36 +; LA32-NEXT: .cfi_offset 30, -40 +; LA32-NEXT: .cfi_offset 31, -44 +; LA32-NEXT: st.w $a2, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: ld.w $a6, $a1, 0 +; LA32-NEXT: ld.w $a7, $a0, 0 +; LA32-NEXT: mulh.wu $a3, $a7, $a6 +; LA32-NEXT: ld.w $a5, $a0, 4 +; LA32-NEXT: mul.w $a4, $a5, $a6 +; LA32-NEXT: add.w $a3, $a4, $a3 +; LA32-NEXT: sltu $a4, $a3, $a4 +; LA32-NEXT: mulh.wu $t0, $a5, $a6 +; LA32-NEXT: add.w $a4, $t0, $a4 +; LA32-NEXT: ld.w $t0, $a1, 4 +; LA32-NEXT: mul.w $t1, $a7, $t0 +; LA32-NEXT: add.w $a3, $t1, $a3 +; LA32-NEXT: st.w $a3, $sp, 44 # 4-byte Folded Spill +; LA32-NEXT: sltu $t1, $a3, $t1 +; LA32-NEXT: mulh.wu $t2, $a7, $t0 +; LA32-NEXT: add.w $t1, $t2, $t1 +; LA32-NEXT: ld.w $t4, $a0, 12 +; LA32-NEXT: ld.w $t2, $a0, 8 +; LA32-NEXT: ld.w $t3, $a1, 8 +; LA32-NEXT: mulh.wu $a0, $t2, $t3 +; LA32-NEXT: mul.w $t5, $t4, $t3 +; LA32-NEXT: add.w $a0, $t5, $a0 +; LA32-NEXT: sltu $t5, $a0, $t5 +; LA32-NEXT: mulh.wu $t6, $t4, $t3 +; LA32-NEXT: add.w $t5, $t6, $t5 +; LA32-NEXT: ld.w $t7, $a1, 12 +; LA32-NEXT: mul.w $a1, $t2, $t7 +; LA32-NEXT: add.w $a0, $a1, $a0 +; LA32-NEXT: st.w $a0, $sp, 48 # 4-byte Folded Spill +; LA32-NEXT: sltu $a1, $a0, $a1 +; LA32-NEXT: mulh.wu $t6, $t2, $t7 +; LA32-NEXT: add.w $t6, $t6, $a1 +; LA32-NEXT: srai.w $s7, $t4, 31 +; LA32-NEXT: mul.w $a1, $s7, $t7 +; LA32-NEXT: mulh.wu $t8, $s7, $t3 +; LA32-NEXT: add.w $t8, $t8, $a1 +; LA32-NEXT: mulh.wu $fp, $a6, $s7 +; LA32-NEXT: mul.w $s6, $t0, $s7 +; LA32-NEXT: add.w $s8, $s6, $fp +; LA32-NEXT: mul.w $a1, $a6, $s7 +; LA32-NEXT: add.w $ra, $a1, $s8 +; LA32-NEXT: sltu $s0, $ra, $a1 +; LA32-NEXT: add.w $a0, $fp, $s0 +; LA32-NEXT: add.w $a3, $a4, $t1 +; LA32-NEXT: st.w $a3, $sp, 20 # 4-byte Folded Spill +; LA32-NEXT: sltu $a4, $a3, $a4 +; LA32-NEXT: mulh.wu $t1, $a5, $t0 +; LA32-NEXT: add.w $a3, $t1, $a4 +; LA32-NEXT: st.w $a3, $sp, 28 # 4-byte Folded Spill +; LA32-NEXT: srai.w $s4, $t7, 31 +; LA32-NEXT: mul.w $fp, $a7, $s4 +; LA32-NEXT: mulh.wu $a4, $a7, $s4 +; LA32-NEXT: add.w $s1, $a4, $fp +; LA32-NEXT: sltu $s0, $s1, $fp +; LA32-NEXT: add.w $s5, $a4, $s0 +; LA32-NEXT: mul.w $a4, $s7, $t3 +; LA32-NEXT: add.w $t8, $t8, $a4 +; LA32-NEXT: add.w $s0, $ra, $t8 +; LA32-NEXT: add.w $a3, $a1, $a4 +; LA32-NEXT: st.w $a3, $sp, 32 # 4-byte Folded Spill +; LA32-NEXT: sltu $a4, $a3, $a1 +; LA32-NEXT: add.w $a3, $s0, $a4 +; LA32-NEXT: st.w $a3, $sp, 24 # 4-byte Folded Spill +; LA32-NEXT: add.w $s3, $t5, $t6 +; LA32-NEXT: sltu $a4, $s3, $t5 +; LA32-NEXT: mulh.wu $t5, $t4, $t7 +; LA32-NEXT: add.w $a3, $t5, $a4 +; LA32-NEXT: st.w $a3, $sp, 16 # 4-byte Folded Spill +; LA32-NEXT: mul.w $a4, $a7, $a6 +; LA32-NEXT: st.w $a4, $a2, 0 +; LA32-NEXT: sltu $a4, $s8, $s6 +; LA32-NEXT: mulh.wu $t5, $t0, $s7 +; LA32-NEXT: add.w $a4, $t5, $a4 +; LA32-NEXT: add.w $t1, $a4, $a0 +; LA32-NEXT: sltu $a4, $t1, $a4 +; LA32-NEXT: add.w $s2, $t5, $a4 +; LA32-NEXT: mulh.wu $a4, $a7, $t3 +; LA32-NEXT: mul.w $t5, $a5, $t3 +; LA32-NEXT: add.w $a4, $t5, $a4 +; LA32-NEXT: sltu $t5, $a4, $t5 +; LA32-NEXT: mulh.wu $t6, $a5, $t3 +; LA32-NEXT: add.w $a3, $t6, $t5 +; LA32-NEXT: mul.w $t6, $a7, $t7 +; LA32-NEXT: add.w $t5, $t6, $a4 +; LA32-NEXT: sltu $a4, $t5, $t6 +; LA32-NEXT: mulh.wu $t6, $a7, $t7 +; LA32-NEXT: add.w $a4, $t6, $a4 +; LA32-NEXT: mulh.wu $t6, $t2, $a6 +; LA32-NEXT: mul.w $s7, $t4, $a6 +; LA32-NEXT: add.w $t6, $s7, $t6 +; LA32-NEXT: sltu $s7, $t6, $s7 +; LA32-NEXT: mulh.wu $s8, $t4, $a6 +; LA32-NEXT: add.w $a0, $s8, $s7 +; LA32-NEXT: mul.w $s7, $t2, $t0 +; LA32-NEXT: add.w $t6, $s7, $t6 +; LA32-NEXT: sltu $s7, $t6, $s7 +; LA32-NEXT: mulh.wu $s8, $t2, $t0 +; LA32-NEXT: add.w $a2, $s8, $s7 +; LA32-NEXT: mul.w $s8, $a5, $s4 +; LA32-NEXT: add.w $s7, $s1, $s8 +; LA32-NEXT: add.w $s1, $s7, $ra +; LA32-NEXT: add.w $a1, $fp, $a1 +; LA32-NEXT: st.w $a1, $sp, 40 # 4-byte Folded Spill +; LA32-NEXT: sltu $ra, $a1, $fp +; LA32-NEXT: add.w $a1, $s1, $ra +; LA32-NEXT: st.w $a1, $sp, 36 # 4-byte Folded Spill +; LA32-NEXT: xor $s0, $a1, $s7 +; LA32-NEXT: sltui $s0, $s0, 1 +; LA32-NEXT: sltu $a1, $a1, $s7 +; LA32-NEXT: masknez $s1, $a1, $s0 +; LA32-NEXT: maskeqz $s0, $ra, $s0 +; LA32-NEXT: add.w $t1, $s6, $t1 +; LA32-NEXT: sltu $s6, $t1, $s6 +; LA32-NEXT: add.w $s2, $s2, $s6 +; LA32-NEXT: add.w $a2, $a0, $a2 +; LA32-NEXT: sltu $a0, $a2, $a0 +; LA32-NEXT: mulh.wu $s6, $t4, $t0 +; LA32-NEXT: add.w $t8, $s6, $a0 +; LA32-NEXT: add.w $a4, $a3, $a4 +; LA32-NEXT: sltu $a3, $a4, $a3 +; LA32-NEXT: mulh.wu $s6, $a5, $t7 +; LA32-NEXT: add.w $a3, $s6, $a3 +; LA32-NEXT: mul.w $s6, $t4, $t7 +; LA32-NEXT: mul.w $t7, $a5, $t7 +; LA32-NEXT: mul.w $ra, $t4, $t0 +; LA32-NEXT: mul.w $t0, $a5, $t0 +; LA32-NEXT: mul.w $t4, $t4, $s4 +; LA32-NEXT: mul.w $a7, $a7, $t3 +; LA32-NEXT: mul.w $a6, $t2, $a6 +; LA32-NEXT: mul.w $t3, $t2, $t3 +; LA32-NEXT: mul.w $a0, $t2, $s4 +; LA32-NEXT: mulh.wu $t2, $t2, $s4 +; LA32-NEXT: mulh.wu $a5, $s4, $a5 +; LA32-NEXT: sltu $s4, $s7, $s8 +; LA32-NEXT: add.w $s4, $a5, $s4 +; LA32-NEXT: add.w $s4, $s5, $s4 +; LA32-NEXT: sltu $s5, $s4, $s5 +; LA32-NEXT: add.w $s5, $a5, $s5 +; LA32-NEXT: ld.w $a1, $sp, 20 # 4-byte Folded Reload +; LA32-NEXT: add.w $a1, $t0, $a1 +; LA32-NEXT: sltu $a5, $a1, $t0 +; LA32-NEXT: ld.w $t0, $sp, 28 # 4-byte Folded Reload +; LA32-NEXT: add.w $t0, $t0, $a5 +; LA32-NEXT: or $s0, $s0, $s1 +; LA32-NEXT: add.w $a4, $t7, $a4 +; LA32-NEXT: sltu $a5, $a4, $t7 +; LA32-NEXT: add.w $t7, $a3, $a5 +; LA32-NEXT: add.w $s1, $ra, $a2 +; LA32-NEXT: sltu $a2, $s1, $ra +; LA32-NEXT: add.w $t8, $t8, $a2 +; LA32-NEXT: add.w $a5, $s6, $s3 +; LA32-NEXT: sltu $a2, $a5, $s6 +; LA32-NEXT: ld.w $a3, $sp, 16 # 4-byte Folded Reload +; LA32-NEXT: add.w $a2, $a3, $a2 +; LA32-NEXT: ld.w $s6, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: ld.w $a3, $sp, 44 # 4-byte Folded Reload +; LA32-NEXT: st.w $a3, $s6, 4 +; LA32-NEXT: ld.w $a3, $sp, 24 # 4-byte Folded Reload +; LA32-NEXT: add.w $a3, $s2, $a3 +; LA32-NEXT: ld.w $s2, $sp, 32 # 4-byte Folded Reload +; LA32-NEXT: add.w $s2, $t1, $s2 +; LA32-NEXT: sltu $t1, $s2, $t1 +; LA32-NEXT: add.w $a3, $a3, $t1 +; LA32-NEXT: add.w $t1, $s8, $s4 +; LA32-NEXT: sltu $s3, $t1, $s8 +; LA32-NEXT: add.w $s3, $s5, $s3 +; LA32-NEXT: add.w $t2, $t2, $a0 +; LA32-NEXT: add.w $t2, $t2, $t4 +; LA32-NEXT: add.w $t2, $t2, $s7 +; LA32-NEXT: add.w $t4, $a0, $fp +; LA32-NEXT: sltu $a0, $t4, $a0 +; LA32-NEXT: add.w $a0, $t2, $a0 +; LA32-NEXT: add.w $a0, $s3, $a0 +; LA32-NEXT: add.w $t2, $t1, $t4 +; LA32-NEXT: sltu $t1, $t2, $t1 +; LA32-NEXT: add.w $a0, $a0, $t1 +; LA32-NEXT: add.w $a0, $a0, $a3 +; LA32-NEXT: add.w $t1, $t2, $s2 +; LA32-NEXT: sltu $a3, $t1, $t2 +; LA32-NEXT: add.w $a0, $a0, $a3 +; LA32-NEXT: add.w $a3, $t6, $t0 +; LA32-NEXT: add.w $a1, $a6, $a1 +; LA32-NEXT: sltu $a6, $a1, $a6 +; LA32-NEXT: add.w $t0, $a3, $a6 +; LA32-NEXT: add.w $a1, $a7, $a1 +; LA32-NEXT: sltu $a7, $a1, $a7 +; LA32-NEXT: add.w $a3, $t5, $t0 +; LA32-NEXT: add.w $a3, $a3, $a7 +; LA32-NEXT: sltu $t2, $a3, $t5 +; LA32-NEXT: xor $t4, $a3, $t5 +; LA32-NEXT: sltui $t4, $t4, 1 +; LA32-NEXT: masknez $t2, $t2, $t4 +; LA32-NEXT: maskeqz $a7, $a7, $t4 +; LA32-NEXT: st.w $a1, $s6, 8 +; LA32-NEXT: or $a1, $a7, $t2 +; LA32-NEXT: sltu $a7, $t0, $t6 +; LA32-NEXT: xor $t0, $t0, $t6 +; LA32-NEXT: sltui $t0, $t0, 1 +; LA32-NEXT: masknez $a7, $a7, $t0 +; LA32-NEXT: maskeqz $a6, $a6, $t0 +; LA32-NEXT: or $a6, $a6, $a7 +; LA32-NEXT: add.w $a6, $s1, $a6 +; LA32-NEXT: sltu $a7, $a6, $s1 +; LA32-NEXT: add.w $a7, $t8, $a7 +; LA32-NEXT: add.w $a1, $a4, $a1 +; LA32-NEXT: sltu $a4, $a1, $a4 +; LA32-NEXT: add.w $a4, $t7, $a4 +; LA32-NEXT: add.w $t0, $t1, $s0 +; LA32-NEXT: sltu $t1, $t0, $t1 +; LA32-NEXT: add.w $a0, $a0, $t1 +; LA32-NEXT: st.w $a3, $s6, 12 +; LA32-NEXT: add.w $a1, $a6, $a1 +; LA32-NEXT: sltu $a6, $a1, $a6 +; LA32-NEXT: add.w $a4, $a7, $a4 +; LA32-NEXT: add.w $a4, $a4, $a6 +; LA32-NEXT: sltu $t1, $a4, $a7 +; LA32-NEXT: xor $a7, $a4, $a7 +; LA32-NEXT: sltui $a7, $a7, 1 +; LA32-NEXT: masknez $t1, $t1, $a7 +; LA32-NEXT: maskeqz $a6, $a6, $a7 +; LA32-NEXT: or $a6, $a6, $t1 +; LA32-NEXT: add.w $a6, $a5, $a6 +; LA32-NEXT: sltu $a5, $a6, $a5 +; LA32-NEXT: add.w $a2, $a2, $a5 +; LA32-NEXT: ld.w $t1, $sp, 48 # 4-byte Folded Reload +; LA32-NEXT: add.w $a4, $t1, $a4 +; LA32-NEXT: add.w $a1, $t3, $a1 +; LA32-NEXT: sltu $a5, $a1, $t3 +; LA32-NEXT: add.w $a4, $a4, $a5 +; LA32-NEXT: sltu $a7, $a4, $t1 +; LA32-NEXT: xor $t1, $a4, $t1 +; LA32-NEXT: sltui $t1, $t1, 1 +; LA32-NEXT: masknez $a7, $a7, $t1 +; LA32-NEXT: maskeqz $a5, $a5, $t1 +; LA32-NEXT: or $a5, $a5, $a7 +; LA32-NEXT: add.w $a5, $a6, $a5 +; LA32-NEXT: sltu $a6, $a5, $a6 +; LA32-NEXT: add.w $a2, $a2, $a6 +; LA32-NEXT: add.w $a0, $a2, $a0 +; LA32-NEXT: add.w $a2, $a5, $t0 +; LA32-NEXT: sltu $a5, $a2, $a5 +; LA32-NEXT: add.w $a0, $a0, $a5 +; LA32-NEXT: ld.w $a5, $sp, 40 # 4-byte Folded Reload +; LA32-NEXT: add.w $a5, $a1, $a5 +; LA32-NEXT: sltu $a1, $a5, $a1 +; LA32-NEXT: ld.w $a6, $sp, 36 # 4-byte Folded Reload +; LA32-NEXT: add.w $a6, $a4, $a6 +; LA32-NEXT: add.w $a6, $a6, $a1 +; LA32-NEXT: sltu $a7, $a6, $a4 +; LA32-NEXT: xor $a4, $a6, $a4 +; LA32-NEXT: sltui $a4, $a4, 1 +; LA32-NEXT: masknez $a7, $a7, $a4 +; LA32-NEXT: maskeqz $a1, $a1, $a4 +; LA32-NEXT: or $a1, $a1, $a7 +; LA32-NEXT: add.w $a1, $a2, $a1 +; LA32-NEXT: sltu $a2, $a1, $a2 +; LA32-NEXT: add.w $a0, $a0, $a2 +; LA32-NEXT: srai.w $a2, $a3, 31 +; LA32-NEXT: xor $a3, $a6, $a2 +; LA32-NEXT: xor $a0, $a0, $a2 +; LA32-NEXT: or $a0, $a3, $a0 +; LA32-NEXT: xor $a3, $a5, $a2 +; LA32-NEXT: xor $a1, $a1, $a2 +; LA32-NEXT: or $a1, $a3, $a1 +; LA32-NEXT: or $a0, $a1, $a0 +; LA32-NEXT: sltu $a0, $zero, $a0 +; LA32-NEXT: ld.w $s8, $sp, 52 # 4-byte Folded Reload +; LA32-NEXT: ld.w $s7, $sp, 56 # 4-byte Folded Reload +; LA32-NEXT: ld.w $s6, $sp, 60 # 4-byte Folded Reload +; LA32-NEXT: ld.w $s5, $sp, 64 # 4-byte Folded Reload +; LA32-NEXT: ld.w $s4, $sp, 68 # 4-byte Folded Reload +; LA32-NEXT: ld.w $s3, $sp, 72 # 4-byte Folded Reload +; LA32-NEXT: ld.w $s2, $sp, 76 # 4-byte Folded Reload +; LA32-NEXT: ld.w $s1, $sp, 80 # 4-byte Folded Reload +; LA32-NEXT: ld.w $s0, $sp, 84 # 4-byte Folded Reload +; LA32-NEXT: ld.w $fp, $sp, 88 # 4-byte Folded Reload +; LA32-NEXT: ld.w $ra, $sp, 92 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 96 +; LA32-NEXT: ret +; +; LA64-LABEL: smuloi128: +; LA64: # %bb.0: +; LA64-NEXT: srai.d $a5, $a1, 63 +; LA64-NEXT: mul.d $a6, $a2, $a5 +; LA64-NEXT: mulh.du $a7, $a2, $a5 +; LA64-NEXT: add.d $a7, $a7, $a6 +; LA64-NEXT: mul.d $a5, $a3, $a5 +; LA64-NEXT: add.d $a5, $a7, $a5 +; LA64-NEXT: srai.d $a7, $a3, 63 +; LA64-NEXT: mul.d $t0, $a7, $a1 +; LA64-NEXT: mulh.du $t1, $a7, $a0 +; LA64-NEXT: add.d $t0, $t1, $t0 +; LA64-NEXT: mul.d $a7, $a7, $a0 +; LA64-NEXT: add.d $t0, $t0, $a7 +; LA64-NEXT: add.d $a5, $t0, $a5 +; LA64-NEXT: mulh.du $t0, $a0, $a2 +; LA64-NEXT: mul.d $t1, $a1, $a2 +; LA64-NEXT: add.d $t0, $t1, $t0 +; LA64-NEXT: sltu $t1, $t0, $t1 +; LA64-NEXT: mulh.du $t2, $a1, $a2 +; LA64-NEXT: add.d $t1, $t2, $t1 +; LA64-NEXT: mul.d $t2, $a0, $a3 +; LA64-NEXT: add.d $t0, $t2, $t0 +; LA64-NEXT: sltu $t2, $t0, $t2 +; LA64-NEXT: mulh.du $t3, $a0, $a3 +; LA64-NEXT: add.d $t2, $t3, $t2 +; LA64-NEXT: add.d $a6, $a7, $a6 +; LA64-NEXT: sltu $a7, $a6, $a7 +; LA64-NEXT: add.d $a5, $a5, $a7 +; LA64-NEXT: mul.d $a0, $a0, $a2 +; LA64-NEXT: mul.d $a2, $a1, $a3 +; LA64-NEXT: mulh.du $a1, $a1, $a3 +; LA64-NEXT: add.d $a3, $t1, $t2 +; LA64-NEXT: sltu $a7, $a3, $t1 +; LA64-NEXT: add.d $a1, $a1, $a7 +; LA64-NEXT: st.d $a0, $a4, 0 +; LA64-NEXT: add.d $a0, $a2, $a3 +; LA64-NEXT: sltu $a2, $a0, $a2 +; LA64-NEXT: add.d $a1, $a1, $a2 +; LA64-NEXT: st.d $t0, $a4, 8 +; LA64-NEXT: add.d $a1, $a1, $a5 +; LA64-NEXT: add.d $a2, $a0, $a6 +; LA64-NEXT: sltu $a0, $a2, $a0 +; LA64-NEXT: add.d $a0, $a1, $a0 +; LA64-NEXT: srai.d $a1, $t0, 63 +; LA64-NEXT: xor $a0, $a0, $a1 +; LA64-NEXT: xor $a1, $a2, $a1 +; LA64-NEXT: or $a0, $a1, $a0 +; LA64-NEXT: sltu $a0, $zero, $a0 +; LA64-NEXT: ret + %t = call {i128, i1} @llvm.smul.with.overflow.i128(i128 %v1, i128 %v2) + %val = extractvalue {i128, i1} %t, 0 + %obit = extractvalue {i128, i1} %t, 1 + store i128 %val, ptr %res + ret i1 %obit +} + +declare {i64, i1} @llvm.smul.with.overflow.i64(i64, i64) nounwind readnone +declare {i128, i1} @llvm.smul.with.overflow.i128(i128, i128) nounwind readnone diff --git a/llvm/test/CodeGen/LoongArch/statepoint-call-lowering-r1.ll b/llvm/test/CodeGen/LoongArch/statepoint-call-lowering-r1.ll new file mode 100644 index 0000000000000000000000000000000000000000..ee55ed337a28c1f49918114f83d0864815e4be76 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/statepoint-call-lowering-r1.ll @@ -0,0 +1,13 @@ +; RUN: llc --mtriple=loongarch64 --verify-machineinstrs --stop-after=prologepilog < %s | FileCheck %s + +;; Check that STATEPOINT instruction has an early clobber implicit def for R1. + +define void @test() gc "statepoint-example" { +entry: + %safepoint_token = tail call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 0, i32 0, ptr elementtype(void ()) @return_i1, i32 0, i32 0, i32 0, i32 0) ["gc-live" ()] +; CHECK: STATEPOINT 0, 0, 0, target-flags(loongarch-call-plt) @return_i1, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, csr_ilp32d_lp64d, implicit-def $r3, implicit-def dead early-clobber $r1 + ret void +} + +declare void @return_i1() +declare token @llvm.experimental.gc.statepoint.p0(i64, i32, ptr, i32, i32, ...) diff --git a/llvm/test/CodeGen/LoongArch/statepoint-call-lowering.ll b/llvm/test/CodeGen/LoongArch/statepoint-call-lowering.ll new file mode 100644 index 0000000000000000000000000000000000000000..e13b046a7d78e9ad893400470a2c76ac89f2e5ff --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/statepoint-call-lowering.ll @@ -0,0 +1,230 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc --mtriple=loongarch64 -verify-machineinstrs < %s | FileCheck %s +;; A collection of basic functionality tests for statepoint lowering - most +;; interesting cornercases are exercised through the x86 tests. + +%struct = type { i64, i64 } + +declare zeroext i1 @return_i1() +declare zeroext i32 @return_i32() +declare ptr @return_i32ptr() +declare float @return_float() +declare %struct @return_struct() +declare void @varargf(i32, ...) + +define i1 @test_i1_return() nounwind gc "statepoint-example" { +;; This is just checking that a i1 gets lowered normally when there's no extra +;; state arguments to the statepoint +; CHECK-LABEL: test_i1_return: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; CHECK-NEXT: bl %plt(return_i1) +; CHECK-NEXT: .Ltmp0: +; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: ret +entry: + %safepoint_token = tail call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 0, i32 0, ptr elementtype(i1 ()) @return_i1, i32 0, i32 0, i32 0, i32 0) + %call1 = call zeroext i1 @llvm.experimental.gc.result.i1(token %safepoint_token) + ret i1 %call1 +} + +define i32 @test_i32_return() nounwind gc "statepoint-example" { +; CHECK-LABEL: test_i32_return: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; CHECK-NEXT: bl %plt(return_i32) +; CHECK-NEXT: .Ltmp1: +; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: ret +entry: + %safepoint_token = tail call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 0, i32 0, ptr elementtype(i32 ()) @return_i32, i32 0, i32 0, i32 0, i32 0) + %call1 = call zeroext i32 @llvm.experimental.gc.result.i32(token %safepoint_token) + ret i32 %call1 +} + +define ptr @test_i32ptr_return() nounwind gc "statepoint-example" { +; CHECK-LABEL: test_i32ptr_return: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; CHECK-NEXT: bl %plt(return_i32ptr) +; CHECK-NEXT: .Ltmp2: +; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: ret +entry: + %safepoint_token = tail call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 0, i32 0, ptr elementtype(ptr ()) @return_i32ptr, i32 0, i32 0, i32 0, i32 0) + %call1 = call ptr @llvm.experimental.gc.result.p0(token %safepoint_token) + ret ptr %call1 +} + +define float @test_float_return() nounwind gc "statepoint-example" { +; CHECK-LABEL: test_float_return: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; CHECK-NEXT: bl %plt(return_float) +; CHECK-NEXT: .Ltmp3: +; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: ret +entry: + %safepoint_token = tail call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 0, i32 0, ptr elementtype(float ()) @return_float, i32 0, i32 0, i32 0, i32 0) + %call1 = call float @llvm.experimental.gc.result.f32(token %safepoint_token) + ret float %call1 +} + +define %struct @test_struct_return() nounwind gc "statepoint-example" { +; CHECK-LABEL: test_struct_return: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; CHECK-NEXT: bl %plt(return_struct) +; CHECK-NEXT: .Ltmp4: +; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: ret +entry: + %safepoint_token = tail call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 0, i32 0, ptr elementtype(%struct ()) @return_struct, i32 0, i32 0, i32 0, i32 0) + %call1 = call %struct @llvm.experimental.gc.result.struct(token %safepoint_token) + ret %struct %call1 +} + +define i1 @test_relocate(ptr addrspace(1) %a) nounwind gc "statepoint-example" { +; CHECK-LABEL: test_relocate: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; CHECK-NEXT: st.d $a0, $sp, 0 +; CHECK-NEXT: bl %plt(return_i1) +; CHECK-NEXT: .Ltmp5: +; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: ret +entry: + %safepoint_token = tail call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 0, i32 0, ptr elementtype(i1 ()) @return_i1, i32 0, i32 0, i32 0, i32 0) ["gc-live" (ptr addrspace(1) %a)] + %call1 = call ptr addrspace(1) @llvm.experimental.gc.relocate.p1(token %safepoint_token, i32 0, i32 0) + %call2 = call zeroext i1 @llvm.experimental.gc.result.i1(token %safepoint_token) + ret i1 %call2 +} + +define void @test_void_vararg() nounwind gc "statepoint-example" { +; CHECK-LABEL: test_void_vararg: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; CHECK-NEXT: ori $a0, $zero, 42 +; CHECK-NEXT: ori $a1, $zero, 43 +; CHECK-NEXT: bl %plt(varargf) +; CHECK-NEXT: .Ltmp6: +; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: ret +entry: + %safepoint_token = tail call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 0, i32 0, ptr elementtype(void (i32, ...)) @varargf, i32 2, i32 0, i32 42, i32 43, i32 0, i32 0) + ;; if we try to use the result from a statepoint wrapping a + ;; non-void-returning varargf, we will experience a crash. + ret void +} + +define i1 @test_i1_return_patchable() nounwind gc "statepoint-example" { +;; A patchable variant of test_i1_return +; CHECK-LABEL: test_i1_return_patchable: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; CHECK-NEXT: nop +; CHECK-NEXT: .Ltmp7: +; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: ret +entry: + %safepoint_token = tail call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 0, i32 4, ptr elementtype(i1 ()) null, i32 0, i32 0, i32 0, i32 0) + %call1 = call zeroext i1 @llvm.experimental.gc.result.i1(token %safepoint_token) + ret i1 %call1 +} + +declare void @consume(ptr addrspace(1) %obj) + +define i1 @test_cross_bb(ptr addrspace(1) %a, i1 %external_cond) nounwind gc "statepoint-example" { +; CHECK-LABEL: test_cross_bb: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -32 +; CHECK-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill +; CHECK-NEXT: st.d $s0, $sp, 8 # 8-byte Folded Spill +; CHECK-NEXT: move $s0, $a1 +; CHECK-NEXT: st.d $a0, $sp, 0 +; CHECK-NEXT: bl %plt(return_i1) +; CHECK-NEXT: .Ltmp8: +; CHECK-NEXT: move $fp, $a0 +; CHECK-NEXT: andi $a0, $s0, 1 +; CHECK-NEXT: beqz $a0, .LBB8_2 +; CHECK-NEXT: # %bb.1: # %left +; CHECK-NEXT: ld.d $a0, $sp, 0 +; CHECK-NEXT: bl %plt(consume) +; CHECK-NEXT: move $a0, $fp +; CHECK-NEXT: b .LBB8_3 +; CHECK-NEXT: .LBB8_2: # %right +; CHECK-NEXT: ori $a0, $zero, 1 +; CHECK-NEXT: .LBB8_3: # %right +; CHECK-NEXT: ld.d $s0, $sp, 8 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 32 +; CHECK-NEXT: ret +entry: + %safepoint_token = tail call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 0, i32 0, ptr elementtype(i1 ()) @return_i1, i32 0, i32 0, i32 0, i32 0) ["gc-live" (ptr addrspace(1) %a)] + br i1 %external_cond, label %left, label %right + +left: + %call1 = call ptr addrspace(1) @llvm.experimental.gc.relocate.p1(token %safepoint_token, i32 0, i32 0) + %call2 = call zeroext i1 @llvm.experimental.gc.result.i1(token %safepoint_token) + call void @consume(ptr addrspace(1) %call1) + ret i1 %call2 + +right: + ret i1 true +} + +%struct2 = type { i64, i64, i64 } + +declare void @consume_attributes(i32, ptr nest, i32, ptr byval(%struct2)) + +define void @test_attributes(ptr byval(%struct2) %s) nounwind gc "statepoint-example" { +; CHECK-LABEL: test_attributes: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -32 +; CHECK-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill +; CHECK-NEXT: ld.d $a1, $a0, 16 +; CHECK-NEXT: st.d $a1, $sp, 16 +; CHECK-NEXT: ld.d $a1, $a0, 8 +; CHECK-NEXT: st.d $a1, $sp, 8 +; CHECK-NEXT: ld.d $a0, $a0, 0 +; CHECK-NEXT: st.d $a0, $sp, 0 +; CHECK-NEXT: ori $a0, $zero, 42 +; CHECK-NEXT: ori $a2, $zero, 17 +; CHECK-NEXT: addi.d $a3, $sp, 0 +; CHECK-NEXT: move $a1, $zero +; CHECK-NEXT: bl %plt(consume_attributes) +; CHECK-NEXT: .Ltmp9: +; CHECK-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 32 +; CHECK-NEXT: ret +entry: +;; We call a function that has a nest argument and a byval argument. + %statepoint_token = call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 0, i32 0, ptr elementtype(void (i32, ptr, i32, ptr)) @consume_attributes, i32 4, i32 0, i32 42, ptr nest null, i32 17, ptr byval(%struct2) %s, i32 0, i32 0) + ret void +} + +declare token @llvm.experimental.gc.statepoint.p0(i64, i32, ptr, i32, i32, ...) +declare i1 @llvm.experimental.gc.result.i1(token) +declare i32 @llvm.experimental.gc.result.i32(token) +declare ptr @llvm.experimental.gc.result.p0(token) +declare float @llvm.experimental.gc.result.f32(token) +declare %struct @llvm.experimental.gc.result.struct(token) +declare ptr addrspace(1) @llvm.experimental.gc.relocate.p1(token, i32, i32) diff --git a/llvm/test/CodeGen/LoongArch/test_bl_fixupkind.mir b/llvm/test/CodeGen/LoongArch/test_bl_fixupkind.mir new file mode 100644 index 0000000000000000000000000000000000000000..70cd5fb8d7eb690dbc0edc2f83ba65209f39c04f --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/test_bl_fixupkind.mir @@ -0,0 +1,62 @@ +# RUN: llc --mtriple=loongarch64 --filetype=obj %s -o - | \ +# RUN: llvm-objdump -d - | FileCheck %s + +# REQUIRES: asserts + +## Check that bl can get fixupkind correctly, whether BL contains +## target-flags(loongarch-call) or not. + +--- | + target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128" + target triple = "loongarch64" + + define dso_local void @test_bl_fixupkind_with_flag() { + ; CHECK-LABEL: test_bl_fixupkind_with_flag + ; CHECK: addi.d $sp, $sp, -16 + ; CHECK-NEXT: st.d $ra, $sp, 8 + ; CHECK-NEXT: bl 0 + ; CHECK-NEXT: ld.d $ra, $sp, 8 + ; CHECK-NEXT: addi.d $sp, $sp, 16 + ; CHECK-NEXT: ret + entry: + call void @foo() + ret void + } + + define dso_local void @test_bl_fixupkind_without_flag() { + ; CHECK-LABEL: test_bl_fixupkind_without_flag + ; CHECK: addi.d $sp, $sp, -16 + ; CHECK-NEXT: st.d $ra, $sp, 8 + ; CHECK-NEXT: bl 0 + ; CHECK-NEXT: ld.d $ra, $sp, 8 + ; CHECK-NEXT: addi.d $sp, $sp, 16 + ; CHECK-NEXT: ret + entry: + call void @foo() + ret void + } + + declare dso_local void @foo(...) +... +--- +name: test_bl_fixupkind_with_flag +tracksRegLiveness: true +body: | + bb.0.entry: + ADJCALLSTACKDOWN 0, 0, implicit-def dead $r3, implicit $r3 + BL target-flags(loongarch-call) @foo, csr_ilp32d_lp64d, implicit-def $r1, implicit-def dead $r1, implicit-def $r3 + ADJCALLSTACKUP 0, 0, implicit-def dead $r3, implicit $r3 + PseudoRET + +... +--- +name: test_bl_fixupkind_without_flag +tracksRegLiveness: true +body: | + bb.0.entry: + ADJCALLSTACKDOWN 0, 0, implicit-def dead $r3, implicit $r3 + BL @foo, csr_ilp32d_lp64d, implicit-def $r1, implicit-def dead $r1, implicit-def $r3 + ADJCALLSTACKUP 0, 0, implicit-def dead $r3, implicit $r3 + PseudoRET + +... diff --git a/llvm/test/CodeGen/LoongArch/tls-models.ll b/llvm/test/CodeGen/LoongArch/tls-models.ll index d973cd45da0121f6869b50507c1ca2c423d4bbdb..3994df1da7163f1e2049b52d37f5d201ac18ef0c 100644 --- a/llvm/test/CodeGen/LoongArch/tls-models.ll +++ b/llvm/test/CodeGen/LoongArch/tls-models.ll @@ -1,8 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc --mtriple=loongarch32 --relocation-model=pic < %s | FileCheck %s --check-prefix=LA32PIC ; RUN: llc --mtriple=loongarch64 --relocation-model=pic < %s | FileCheck %s --check-prefix=LA64PIC +; RUN: llc --mtriple=loongarch64 --code-model=large --relocation-model=pic < %s | FileCheck %s --check-prefix=LA64LARGEPIC ; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32NOPIC ; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64NOPIC +; RUN: llc --mtriple=loongarch64 --code-model=large < %s | FileCheck %s --check-prefix=LA64LARGENOPIC ;; Check that TLS symbols are lowered correctly based on the specified ;; model. Make sure they're external to avoid them all being optimised to Local @@ -38,6 +40,25 @@ define ptr @f1() nounwind { ; LA64PIC-NEXT: addi.d $sp, $sp, 16 ; LA64PIC-NEXT: ret ; +; LA64LARGEPIC-LABEL: f1: +; LA64LARGEPIC: # %bb.0: # %entry +; LA64LARGEPIC-NEXT: addi.d $sp, $sp, -16 +; LA64LARGEPIC-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64LARGEPIC-NEXT: pcalau12i $a0, %gd_pc_hi20(unspecified) +; LA64LARGEPIC-NEXT: addi.d $t8, $zero, %got_pc_lo12(unspecified) +; LA64LARGEPIC-NEXT: lu32i.d $t8, %got64_pc_lo20(unspecified) +; LA64LARGEPIC-NEXT: lu52i.d $t8, $t8, %got64_pc_hi12(unspecified) +; LA64LARGEPIC-NEXT: add.d $a0, $t8, $a0 +; LA64LARGEPIC-NEXT: pcalau12i $ra, %pc_hi20(__tls_get_addr) +; LA64LARGEPIC-NEXT: addi.d $t8, $zero, %pc_lo12(__tls_get_addr) +; LA64LARGEPIC-NEXT: lu32i.d $t8, %pc64_lo20(__tls_get_addr) +; LA64LARGEPIC-NEXT: lu52i.d $t8, $t8, %pc64_hi12(__tls_get_addr) +; LA64LARGEPIC-NEXT: add.d $ra, $t8, $ra +; LA64LARGEPIC-NEXT: jirl $ra, $ra, 0 +; LA64LARGEPIC-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64LARGEPIC-NEXT: addi.d $sp, $sp, 16 +; LA64LARGEPIC-NEXT: ret +; ; LA32NOPIC-LABEL: f1: ; LA32NOPIC: # %bb.0: # %entry ; LA32NOPIC-NEXT: pcalau12i $a0, %ie_pc_hi20(unspecified) @@ -51,6 +72,16 @@ define ptr @f1() nounwind { ; LA64NOPIC-NEXT: ld.d $a0, $a0, %ie_pc_lo12(unspecified) ; LA64NOPIC-NEXT: add.d $a0, $a0, $tp ; LA64NOPIC-NEXT: ret +; +; LA64LARGENOPIC-LABEL: f1: +; LA64LARGENOPIC: # %bb.0: # %entry +; LA64LARGENOPIC-NEXT: pcalau12i $a0, %ie_pc_hi20(unspecified) +; LA64LARGENOPIC-NEXT: addi.d $t8, $zero, %ie_pc_lo12(unspecified) +; LA64LARGENOPIC-NEXT: lu32i.d $t8, %ie64_pc_lo20(unspecified) +; LA64LARGENOPIC-NEXT: lu52i.d $t8, $t8, %ie64_pc_hi12(unspecified) +; LA64LARGENOPIC-NEXT: ldx.d $a0, $t8, $a0 +; LA64LARGENOPIC-NEXT: add.d $a0, $a0, $tp +; LA64LARGENOPIC-NEXT: ret entry: ret ptr @unspecified } @@ -80,6 +111,25 @@ define ptr @f2() nounwind { ; LA64PIC-NEXT: addi.d $sp, $sp, 16 ; LA64PIC-NEXT: ret ; +; LA64LARGEPIC-LABEL: f2: +; LA64LARGEPIC: # %bb.0: # %entry +; LA64LARGEPIC-NEXT: addi.d $sp, $sp, -16 +; LA64LARGEPIC-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64LARGEPIC-NEXT: pcalau12i $a0, %ld_pc_hi20(ld) +; LA64LARGEPIC-NEXT: addi.d $t8, $zero, %got_pc_lo12(ld) +; LA64LARGEPIC-NEXT: lu32i.d $t8, %got64_pc_lo20(ld) +; LA64LARGEPIC-NEXT: lu52i.d $t8, $t8, %got64_pc_hi12(ld) +; LA64LARGEPIC-NEXT: add.d $a0, $t8, $a0 +; LA64LARGEPIC-NEXT: pcalau12i $ra, %pc_hi20(__tls_get_addr) +; LA64LARGEPIC-NEXT: addi.d $t8, $zero, %pc_lo12(__tls_get_addr) +; LA64LARGEPIC-NEXT: lu32i.d $t8, %pc64_lo20(__tls_get_addr) +; LA64LARGEPIC-NEXT: lu52i.d $t8, $t8, %pc64_hi12(__tls_get_addr) +; LA64LARGEPIC-NEXT: add.d $ra, $t8, $ra +; LA64LARGEPIC-NEXT: jirl $ra, $ra, 0 +; LA64LARGEPIC-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64LARGEPIC-NEXT: addi.d $sp, $sp, 16 +; LA64LARGEPIC-NEXT: ret +; ; LA32NOPIC-LABEL: f2: ; LA32NOPIC: # %bb.0: # %entry ; LA32NOPIC-NEXT: pcalau12i $a0, %ie_pc_hi20(ld) @@ -93,6 +143,16 @@ define ptr @f2() nounwind { ; LA64NOPIC-NEXT: ld.d $a0, $a0, %ie_pc_lo12(ld) ; LA64NOPIC-NEXT: add.d $a0, $a0, $tp ; LA64NOPIC-NEXT: ret +; +; LA64LARGENOPIC-LABEL: f2: +; LA64LARGENOPIC: # %bb.0: # %entry +; LA64LARGENOPIC-NEXT: pcalau12i $a0, %ie_pc_hi20(ld) +; LA64LARGENOPIC-NEXT: addi.d $t8, $zero, %ie_pc_lo12(ld) +; LA64LARGENOPIC-NEXT: lu32i.d $t8, %ie64_pc_lo20(ld) +; LA64LARGENOPIC-NEXT: lu52i.d $t8, $t8, %ie64_pc_hi12(ld) +; LA64LARGENOPIC-NEXT: ldx.d $a0, $t8, $a0 +; LA64LARGENOPIC-NEXT: add.d $a0, $a0, $tp +; LA64LARGENOPIC-NEXT: ret entry: ret ptr @ld } @@ -114,6 +174,16 @@ define ptr @f3() nounwind { ; LA64PIC-NEXT: add.d $a0, $a0, $tp ; LA64PIC-NEXT: ret ; +; LA64LARGEPIC-LABEL: f3: +; LA64LARGEPIC: # %bb.0: # %entry +; LA64LARGEPIC-NEXT: pcalau12i $a0, %ie_pc_hi20(ie) +; LA64LARGEPIC-NEXT: addi.d $t8, $zero, %ie_pc_lo12(ie) +; LA64LARGEPIC-NEXT: lu32i.d $t8, %ie64_pc_lo20(ie) +; LA64LARGEPIC-NEXT: lu52i.d $t8, $t8, %ie64_pc_hi12(ie) +; LA64LARGEPIC-NEXT: ldx.d $a0, $t8, $a0 +; LA64LARGEPIC-NEXT: add.d $a0, $a0, $tp +; LA64LARGEPIC-NEXT: ret +; ; LA32NOPIC-LABEL: f3: ; LA32NOPIC: # %bb.0: # %entry ; LA32NOPIC-NEXT: pcalau12i $a0, %ie_pc_hi20(ie) @@ -127,6 +197,16 @@ define ptr @f3() nounwind { ; LA64NOPIC-NEXT: ld.d $a0, $a0, %ie_pc_lo12(ie) ; LA64NOPIC-NEXT: add.d $a0, $a0, $tp ; LA64NOPIC-NEXT: ret +; +; LA64LARGENOPIC-LABEL: f3: +; LA64LARGENOPIC: # %bb.0: # %entry +; LA64LARGENOPIC-NEXT: pcalau12i $a0, %ie_pc_hi20(ie) +; LA64LARGENOPIC-NEXT: addi.d $t8, $zero, %ie_pc_lo12(ie) +; LA64LARGENOPIC-NEXT: lu32i.d $t8, %ie64_pc_lo20(ie) +; LA64LARGENOPIC-NEXT: lu52i.d $t8, $t8, %ie64_pc_hi12(ie) +; LA64LARGENOPIC-NEXT: ldx.d $a0, $t8, $a0 +; LA64LARGENOPIC-NEXT: add.d $a0, $a0, $tp +; LA64LARGENOPIC-NEXT: ret entry: ret ptr @ie } @@ -148,6 +228,15 @@ define ptr @f4() nounwind { ; LA64PIC-NEXT: add.d $a0, $a0, $tp ; LA64PIC-NEXT: ret ; +; LA64LARGEPIC-LABEL: f4: +; LA64LARGEPIC: # %bb.0: # %entry +; LA64LARGEPIC-NEXT: lu12i.w $a0, %le_hi20(le) +; LA64LARGEPIC-NEXT: ori $a0, $a0, %le_lo12(le) +; LA64LARGEPIC-NEXT: lu32i.d $a0, %le64_lo20(le) +; LA64LARGEPIC-NEXT: lu52i.d $a0, $a0, %le64_hi12(le) +; LA64LARGEPIC-NEXT: add.d $a0, $a0, $tp +; LA64LARGEPIC-NEXT: ret +; ; LA32NOPIC-LABEL: f4: ; LA32NOPIC: # %bb.0: # %entry ; LA32NOPIC-NEXT: lu12i.w $a0, %le_hi20(le) @@ -161,6 +250,15 @@ define ptr @f4() nounwind { ; LA64NOPIC-NEXT: ori $a0, $a0, %le_lo12(le) ; LA64NOPIC-NEXT: add.d $a0, $a0, $tp ; LA64NOPIC-NEXT: ret +; +; LA64LARGENOPIC-LABEL: f4: +; LA64LARGENOPIC: # %bb.0: # %entry +; LA64LARGENOPIC-NEXT: lu12i.w $a0, %le_hi20(le) +; LA64LARGENOPIC-NEXT: ori $a0, $a0, %le_lo12(le) +; LA64LARGENOPIC-NEXT: lu32i.d $a0, %le64_lo20(le) +; LA64LARGENOPIC-NEXT: lu52i.d $a0, $a0, %le64_hi12(le) +; LA64LARGENOPIC-NEXT: add.d $a0, $a0, $tp +; LA64LARGENOPIC-NEXT: ret entry: ret ptr @le } diff --git a/llvm/test/CodeGen/LoongArch/webkit-jscc.ll b/llvm/test/CodeGen/LoongArch/webkit-jscc.ll new file mode 100644 index 0000000000000000000000000000000000000000..1a5270b03736d006239b53e5d9d7418e5c37b01d --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/webkit-jscc.ll @@ -0,0 +1,92 @@ +;; OHOS_LOCAL begin +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; REQUIRES: ark_gc_support +; RUN: llc --mtriple=loongarch64 --mattr=+d < %s | FileCheck %s + +;; 32-bit integers are only aligned to 4 bytes, even on loongarch64. +;; They are *not* promoted to i64. +define webkit_jscc i32 @simple_jscall(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: simple_jscall: +; CHECK: # %bb.0: +; CHECK-NEXT: ld.w $a1, $sp, 0 +; CHECK-NEXT: add.d $a0, $a0, $a1 +; CHECK-NEXT: ld.w $a1, $sp, 8 +; CHECK-NEXT: add.d $a0, $a0, $a1 +; CHECK-NEXT: ret + %ab = add i32 %a, %b + %abc = add i32 %ab, %c + ret i32 %abc +} + +define webkit_jscc i1 @test_i1(i1 %a, i1 %b) { +; CHECK-LABEL: test_i1: +; CHECK: # %bb.0: +; CHECK-NEXT: ld.d $a1, $sp, 0 +; CHECK-NEXT: add.d $a0, $a0, $a1 +; CHECK-NEXT: ret + %ab = add i1 %a, %b + ret i1 %ab +} + +define webkit_jscc i8 @test_i8(i8 %a, i8 %b) { +; CHECK-LABEL: test_i8: +; CHECK: # %bb.0: +; CHECK-NEXT: ld.b $a1, $sp, 0 +; CHECK-NEXT: add.d $a0, $a0, $a1 +; CHECK-NEXT: ret + %ab = add i8 %a, %b + ret i8 %ab +} + +define webkit_jscc i16 @test_i16(i16 %a, i16 %b) { +; CHECK-LABEL: test_i16: +; CHECK: # %bb.0: +; CHECK-NEXT: ld.h $a1, $sp, 0 +; CHECK-NEXT: add.d $a0, $a0, $a1 +; CHECK-NEXT: ret + %ab = add i16 %a, %b + ret i16 %ab +} + +define webkit_jscc i32 @test_i32(i32 %a, i32 %b) { +; CHECK-LABEL: test_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: ld.w $a1, $sp, 0 +; CHECK-NEXT: add.d $a0, $a0, $a1 +; CHECK-NEXT: ret + %ab = add i32 %a, %b + ret i32 %ab +} + +define webkit_jscc i64 @test_i64(i64 %a, i64 %b) { +; CHECK-LABEL: test_i64: +; CHECK: # %bb.0: +; CHECK-NEXT: ld.d $a1, $sp, 0 +; CHECK-NEXT: add.d $a0, $a0, $a1 +; CHECK-NEXT: ret + %ab = add i64 %a, %b + ret i64 %ab +} + +define webkit_jscc float @test_float(float %a, float %b) { +; CHECK-LABEL: test_float: +; CHECK: # %bb.0: +; CHECK-NEXT: fld.s $fa0, $sp, 4 +; CHECK-NEXT: fld.s $fa1, $sp, 0 +; CHECK-NEXT: fadd.s $fa0, $fa1, $fa0 +; CHECK-NEXT: ret + %ab = fadd float %a, %b + ret float %ab +} + +define webkit_jscc double @test_double(double %a, double %b) { +; CHECK-LABEL: test_double: +; CHECK: # %bb.0: +; CHECK-NEXT: fld.d $fa0, $sp, 8 +; CHECK-NEXT: fld.d $fa1, $sp, 0 +; CHECK-NEXT: fadd.d $fa0, $fa1, $fa0 +; CHECK-NEXT: ret + %ab = fadd double %a, %b + ret double %ab +} +;; OHOS_LOCAL end diff --git a/llvm/test/ExecutionEngine/RuntimeDyld/LoongArch/ELF_LoongArch_relocations.s b/llvm/test/ExecutionEngine/RuntimeDyld/LoongArch/ELF_LoongArch_relocations.s new file mode 100644 index 0000000000000000000000000000000000000000..5867abd6a89a5d5b450d34e1af5dc0da488100ca --- /dev/null +++ b/llvm/test/ExecutionEngine/RuntimeDyld/LoongArch/ELF_LoongArch_relocations.s @@ -0,0 +1,89 @@ +# RUN: rm -rf %t && mkdir -p %t +# RUN: llvm-mc --triple=loongarch64 --filetype=obj -o %t/reloc.o %s +# RUN: llvm-rtdyld --triple=loongarch64 --verify --check=%s %t/reloc.o \ +# RUN: --map-section reloc.o,.got=0x21f00 \ +# RUN: --dummy-extern abs=0x0123456789abcdef \ +# RUN: --dummy-extern external_data=0x1234 + + .text + .globl main + .p2align 2 + .type main,@function +main: +## Check R_LARCH_ABS_HI20 +# rtdyld-check: *{4}(main) = 0x1513578c + lu12i.w $t0, %abs_hi20(abs) +## Check R_LARCH_ABS_LO12 +# rtdyld-check: *{4}(main + 4) = 0x03b7bd8c + ori $t0, $t0, %abs_lo12(abs) +## Check R_LARCH_ABS64_LO20 +# rtdyld-check: *{4}(main + 8) = 0x1668acec + lu32i.d $t0, %abs64_lo20(abs) +## Check R_LARCH_ABS64_HI12 +# rtdyld-check: *{4}(main + 12) = 0x0300498c + lu52i.d $t0, $t0, %abs64_hi12(abs) + ret + .size main, .-main + + .globl local_func + .p2align 2 + .type local_func,@function +local_func: + ret + .size local_func, .-local_func + + .globl local_func_call26 + .p2align 2 +local_func_call26: +## Check R_LARCH_B26 +# rtdyld-check: decode_operand(local_func_call26, 0)[27:0] = \ +# rtdyld-check: (local_func - local_func_call26)[27:0] + bl local_func + .size local_func_call26, .-local_func_call26 + + .globl test_pc_hi20 + .p2align 2 +test_pc_hi20: +## Check R_LARCH_PCALA_HI20 +# rtdyld-check: decode_operand(test_pc_hi20, 1)[19:0] = \ +# rtdyld-check: (named_data - test_pc_hi20)[31:12] + \ +# rtdyld-check: named_data[11:11] + pcalau12i $a0, %pc_hi20(named_data) + .size test_pc_hi20, .-test_pc_hi20 + + .globl test_pc_lo12 + .p2align 2 +test_pc_lo12: +## Check R_LARCH_PCALA_LO12 +# rtdyld-check: decode_operand(test_pc_lo12, 2)[11:0] = \ +# rtdyld-check: (named_data)[11:0] + addi.d $a0, $a0, %pc_lo12(named_data) + .size test_pc_lo12, .-test_pc_lo12 + + .globl test_got_pc_hi20 + .p2align 2 +test_got_pc_hi20: +## Check R_LARCH_GOT_PC_HI20 +# rtdyld-check: decode_operand(test_got_pc_hi20, 1)[19:0] = \ +# rtdyld-check: (section_addr(reloc.o, .got)[31:12] - \ +# rtdyld-check: test_got_pc_hi20[31:12] + \ +# rtdyld-check: section_addr(reloc.o, .got)[11:11]) + pcalau12i $a0, %got_pc_hi20(external_data) + .size test_got_pc_hi20, .-test_got_pc_hi20 + + .globl test_got_pc_lo12 + .p2align 2 +test_got_pc_lo12: +## Check R_LARCH_GOT_PC_LO12 +# rtdyld-check: decode_operand(test_got_pc_lo12, 2)[11:0] = \ +# rtdyld-check: (section_addr(reloc.o, .got)[11:0]) + ld.d $a0, $a0, %got_pc_lo12(external_data) + .size test_gotoffset12_external, .-test_gotoffset12_external + + .globl named_data + .p2align 4 + .type named_data,@object +named_data: + .quad 0x2222222222222222 + .quad 0x3333333333333333 + .size named_data, .-named_data diff --git a/llvm/test/ExecutionEngine/RuntimeDyld/LoongArch/lit.local.cfg b/llvm/test/ExecutionEngine/RuntimeDyld/LoongArch/lit.local.cfg new file mode 100644 index 0000000000000000000000000000000000000000..cc24278acbb414ab5be93cffabda76082cc18a3a --- /dev/null +++ b/llvm/test/ExecutionEngine/RuntimeDyld/LoongArch/lit.local.cfg @@ -0,0 +1,2 @@ +if not "LoongArch" in config.root.targets: + config.unsupported = True diff --git a/llvm/test/MC/LoongArch/Basic/Float/d-arith.s b/llvm/test/MC/LoongArch/Basic/Float/d-arith.s index 5639ec886017ffc8854a700b019cbcb3668e6c62..6b2c67e9a2cc174dd03d5f43503fe60215c6aa6a 100644 --- a/llvm/test/MC/LoongArch/Basic/Float/d-arith.s +++ b/llvm/test/MC/LoongArch/Basic/Float/d-arith.s @@ -1,11 +1,11 @@ -# RUN: llvm-mc %s --triple=loongarch32 --mattr=+d --show-encoding \ +# RUN: llvm-mc %s --triple=loongarch32 --show-encoding \ # RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s -# RUN: llvm-mc %s --triple=loongarch64 --mattr=+d --show-encoding \ +# RUN: llvm-mc %s --triple=loongarch64 --show-encoding \ # RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s -# RUN: llvm-mc %s --triple=loongarch32 --mattr=+d --filetype=obj \ +# RUN: llvm-mc %s --triple=loongarch32 --filetype=obj \ # RUN: | llvm-objdump -d - \ # RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s -# RUN: llvm-mc %s --triple=loongarch64 --mattr=+d --filetype=obj \ +# RUN: llvm-mc %s --triple=loongarch64 --filetype=obj \ # RUN: | llvm-objdump -d - \ # RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s diff --git a/llvm/test/MC/LoongArch/Basic/Float/d-bound-check.s b/llvm/test/MC/LoongArch/Basic/Float/d-bound-check.s index bd625dc5549f8bba905eb41323d8f6a35be80af4..414d1329caf35ac383ed94846e792baba5175a9c 100644 --- a/llvm/test/MC/LoongArch/Basic/Float/d-bound-check.s +++ b/llvm/test/MC/LoongArch/Basic/Float/d-bound-check.s @@ -1,11 +1,11 @@ -# RUN: llvm-mc %s --triple=loongarch32 --mattr=+d --show-encoding \ +# RUN: llvm-mc %s --triple=loongarch32 --show-encoding \ # RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s -# RUN: llvm-mc %s --triple=loongarch64 --mattr=+d --show-encoding \ +# RUN: llvm-mc %s --triple=loongarch64 --show-encoding \ # RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s -# RUN: llvm-mc %s --triple=loongarch32 --mattr=+d --filetype=obj \ +# RUN: llvm-mc %s --triple=loongarch32 --filetype=obj \ # RUN: | llvm-objdump -d - \ # RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s -# RUN: llvm-mc %s --triple=loongarch64 --mattr=+d --filetype=obj \ +# RUN: llvm-mc %s --triple=loongarch64 --filetype=obj \ # RUN: | llvm-objdump -d - \ # RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s diff --git a/llvm/test/MC/LoongArch/Basic/Float/d-branch.s b/llvm/test/MC/LoongArch/Basic/Float/d-branch.s index a310cb755fcdc29bce62fc8eb9e429b04b308029..06198d8e4d992c8a9cd035031bc5715edde30bdc 100644 --- a/llvm/test/MC/LoongArch/Basic/Float/d-branch.s +++ b/llvm/test/MC/LoongArch/Basic/Float/d-branch.s @@ -1,11 +1,11 @@ -# RUN: llvm-mc %s --triple=loongarch32 --mattr=+d --show-encoding \ +# RUN: llvm-mc %s --triple=loongarch32 --show-encoding \ # RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s -# RUN: llvm-mc %s --triple=loongarch64 --mattr=+d --show-encoding \ +# RUN: llvm-mc %s --triple=loongarch64 --show-encoding \ # RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s -# RUN: llvm-mc %s --triple=loongarch32 --mattr=+d --filetype=obj \ +# RUN: llvm-mc %s --triple=loongarch32 --filetype=obj \ # RUN: | llvm-objdump -d - \ # RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s -# RUN: llvm-mc %s --triple=loongarch64 --mattr=+d --filetype=obj \ +# RUN: llvm-mc %s --triple=loongarch64 --filetype=obj \ # RUN: | llvm-objdump -d - \ # RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s diff --git a/llvm/test/MC/LoongArch/Basic/Float/d-comp.s b/llvm/test/MC/LoongArch/Basic/Float/d-comp.s index 07f3b6276017f16d7a40cce61d3b7f2e9be4cde3..19abebd9d817b91482230d623c2ebdb4df2ddd76 100644 --- a/llvm/test/MC/LoongArch/Basic/Float/d-comp.s +++ b/llvm/test/MC/LoongArch/Basic/Float/d-comp.s @@ -1,11 +1,11 @@ -# RUN: llvm-mc %s --triple=loongarch32 --mattr=+d --show-encoding \ +# RUN: llvm-mc %s --triple=loongarch32 --show-encoding \ # RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s -# RUN: llvm-mc %s --triple=loongarch64 --mattr=+d --show-encoding \ +# RUN: llvm-mc %s --triple=loongarch64 --show-encoding \ # RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s -# RUN: llvm-mc %s --triple=loongarch32 --mattr=+d --filetype=obj \ +# RUN: llvm-mc %s --triple=loongarch32 --filetype=obj \ # RUN: | llvm-objdump -d - \ # RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s -# RUN: llvm-mc %s --triple=loongarch64 --mattr=+d --filetype=obj \ +# RUN: llvm-mc %s --triple=loongarch64 --filetype=obj \ # RUN: | llvm-objdump -d - \ # RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s diff --git a/llvm/test/MC/LoongArch/Basic/Float/d-conv.s b/llvm/test/MC/LoongArch/Basic/Float/d-conv.s index 10dd822a4c922ef915e7f6c1bec8f7353a808cc6..2d6eae98d53814aa91b9254531989721ffd1b546 100644 --- a/llvm/test/MC/LoongArch/Basic/Float/d-conv.s +++ b/llvm/test/MC/LoongArch/Basic/Float/d-conv.s @@ -1,11 +1,11 @@ -# RUN: llvm-mc %s --triple=loongarch32 --mattr=+d --show-encoding \ +# RUN: llvm-mc %s --triple=loongarch32 --show-encoding \ # RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s -# RUN: llvm-mc %s --triple=loongarch64 --mattr=+d --show-encoding \ +# RUN: llvm-mc %s --triple=loongarch64 --show-encoding \ # RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s -# RUN: llvm-mc %s --triple=loongarch32 --mattr=+d --filetype=obj \ +# RUN: llvm-mc %s --triple=loongarch32 --filetype=obj \ # RUN: | llvm-objdump -d - \ # RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s -# RUN: llvm-mc %s --triple=loongarch64 --mattr=+d --filetype=obj \ +# RUN: llvm-mc %s --triple=loongarch64 --filetype=obj \ # RUN: | llvm-objdump -d - \ # RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s diff --git a/llvm/test/MC/LoongArch/Basic/Float/d-invalid.s b/llvm/test/MC/LoongArch/Basic/Float/d-invalid.s index b38a4461afcb4b1c75b0b5778e9b2f9e52fe04d8..255fd839e08a3005ef94c4b2f499ed132338d6e8 100644 --- a/llvm/test/MC/LoongArch/Basic/Float/d-invalid.s +++ b/llvm/test/MC/LoongArch/Basic/Float/d-invalid.s @@ -1,4 +1,4 @@ -# RUN: not llvm-mc --triple=loongarch32 -mattr=+d %s 2>&1 | FileCheck %s +# RUN: not llvm-mc --triple=loongarch32 %s 2>&1 | FileCheck %s # CHECK: :[[#@LINE+1]]:1: error: instruction requires the following: LA64 Basic Integer and Privilege Instruction Set movgr2fr.d $fa0, $a0 diff --git a/llvm/test/MC/LoongArch/Basic/Float/d-memory.s b/llvm/test/MC/LoongArch/Basic/Float/d-memory.s index 4cb7e6fe951c1d6eee860910843ddb07b2cb515b..bce4563de86f061ca9609ab2fbb81faba45972af 100644 --- a/llvm/test/MC/LoongArch/Basic/Float/d-memory.s +++ b/llvm/test/MC/LoongArch/Basic/Float/d-memory.s @@ -1,11 +1,11 @@ -# RUN: llvm-mc %s --triple=loongarch32 --mattr=+d --show-encoding \ +# RUN: llvm-mc %s --triple=loongarch32 --show-encoding \ # RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s -# RUN: llvm-mc %s --triple=loongarch64 --mattr=+d --show-encoding \ +# RUN: llvm-mc %s --triple=loongarch64 --show-encoding \ # RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s -# RUN: llvm-mc %s --triple=loongarch32 --mattr=+d --filetype=obj \ +# RUN: llvm-mc %s --triple=loongarch32 --filetype=obj \ # RUN: | llvm-objdump -d - \ # RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s -# RUN: llvm-mc %s --triple=loongarch64 --mattr=+d --filetype=obj \ +# RUN: llvm-mc %s --triple=loongarch64 --filetype=obj \ # RUN: | llvm-objdump -d - \ # RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s diff --git a/llvm/test/MC/LoongArch/Basic/Float/d-move.s b/llvm/test/MC/LoongArch/Basic/Float/d-move.s index c5d4b6a5fda97fa94a0bf1e799af27371a7636b4..afe8a51ec1b64a0248a8d2fa74aed885877c951e 100644 --- a/llvm/test/MC/LoongArch/Basic/Float/d-move.s +++ b/llvm/test/MC/LoongArch/Basic/Float/d-move.s @@ -1,11 +1,11 @@ -# RUN: llvm-mc %s --triple=loongarch32 --mattr=+d --show-encoding \ +# RUN: llvm-mc %s --triple=loongarch32 --show-encoding \ # RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s -# RUN: llvm-mc %s --triple=loongarch64 --mattr=+d --show-encoding --defsym=LA64=1 \ +# RUN: llvm-mc %s --triple=loongarch64 --show-encoding --defsym=LA64=1 \ # RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM,ASM-AND-OBJ64,ASM64 %s -# RUN: llvm-mc %s --triple=loongarch32 --mattr=+d --filetype=obj \ +# RUN: llvm-mc %s --triple=loongarch32 --filetype=obj \ # RUN: | llvm-objdump -d - \ # RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s -# RUN: llvm-mc %s --triple=loongarch64 --mattr=+d --filetype=obj --defsym=LA64=1 \ +# RUN: llvm-mc %s --triple=loongarch64 --filetype=obj --defsym=LA64=1 \ # RUN: | llvm-objdump -d - \ # RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM-AND-OBJ64 %s diff --git a/llvm/test/MC/LoongArch/Basic/Float/f-arith.s b/llvm/test/MC/LoongArch/Basic/Float/f-arith.s index 5865d6b6e152f74a1e43c3661095310fa09cecb1..155e783cf4350526cc0bf9056bdf1580b19a2b67 100644 --- a/llvm/test/MC/LoongArch/Basic/Float/f-arith.s +++ b/llvm/test/MC/LoongArch/Basic/Float/f-arith.s @@ -1,11 +1,11 @@ -# RUN: llvm-mc %s --triple=loongarch32 --mattr=+f --show-encoding \ +# RUN: llvm-mc %s --triple=loongarch32 --show-encoding \ # RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s -# RUN: llvm-mc %s --triple=loongarch64 --mattr=+f --show-encoding \ +# RUN: llvm-mc %s --triple=loongarch64 --show-encoding \ # RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s -# RUN: llvm-mc %s --triple=loongarch32 --mattr=+f --filetype=obj \ +# RUN: llvm-mc %s --triple=loongarch32 --filetype=obj \ # RUN: | llvm-objdump -d - \ # RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s -# RUN: llvm-mc %s --triple=loongarch64 --mattr=+f --filetype=obj \ +# RUN: llvm-mc %s --triple=loongarch64 --filetype=obj \ # RUN: | llvm-objdump -d - \ # RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s diff --git a/llvm/test/MC/LoongArch/Basic/Float/f-bound-check.s b/llvm/test/MC/LoongArch/Basic/Float/f-bound-check.s index cdfb67b52af0c10db44f9d20c0a74f58bce43275..4b56bebf3aa6d7489afd46a5de0879eacba20878 100644 --- a/llvm/test/MC/LoongArch/Basic/Float/f-bound-check.s +++ b/llvm/test/MC/LoongArch/Basic/Float/f-bound-check.s @@ -1,11 +1,11 @@ -# RUN: llvm-mc %s --triple=loongarch32 --mattr=+f --show-encoding \ +# RUN: llvm-mc %s --triple=loongarch32 --show-encoding \ # RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s -# RUN: llvm-mc %s --triple=loongarch64 --mattr=+f --show-encoding \ +# RUN: llvm-mc %s --triple=loongarch64 --show-encoding \ # RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s -# RUN: llvm-mc %s --triple=loongarch32 --mattr=+f --filetype=obj \ +# RUN: llvm-mc %s --triple=loongarch32 --filetype=obj \ # RUN: | llvm-objdump -d - \ # RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s -# RUN: llvm-mc %s --triple=loongarch64 --mattr=+f --filetype=obj \ +# RUN: llvm-mc %s --triple=loongarch64 --filetype=obj \ # RUN: | llvm-objdump -d - \ # RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s diff --git a/llvm/test/MC/LoongArch/Basic/Float/f-branch.s b/llvm/test/MC/LoongArch/Basic/Float/f-branch.s index 656808f60f4f6d1b689ef17ba009e2f8623c2e9b..2ac63cdc7c9b53421957e9d3d257622f87afd3c0 100644 --- a/llvm/test/MC/LoongArch/Basic/Float/f-branch.s +++ b/llvm/test/MC/LoongArch/Basic/Float/f-branch.s @@ -1,11 +1,11 @@ -# RUN: llvm-mc %s --triple=loongarch32 --mattr=+f --show-encoding \ +# RUN: llvm-mc %s --triple=loongarch32 --show-encoding \ # RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s -# RUN: llvm-mc %s --triple=loongarch64 --mattr=+f --show-encoding \ +# RUN: llvm-mc %s --triple=loongarch64 --show-encoding \ # RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s -# RUN: llvm-mc %s --triple=loongarch32 --mattr=+f --filetype=obj \ +# RUN: llvm-mc %s --triple=loongarch32 --filetype=obj \ # RUN: | llvm-objdump -d - \ # RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s -# RUN: llvm-mc %s --triple=loongarch64 --mattr=+f --filetype=obj \ +# RUN: llvm-mc %s --triple=loongarch64 --filetype=obj \ # RUN: | llvm-objdump -d - \ # RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s diff --git a/llvm/test/MC/LoongArch/Basic/Float/f-comp.s b/llvm/test/MC/LoongArch/Basic/Float/f-comp.s index 8ba38426d3aa7ef7e22369567368d61615700549..221bea21ca13d5407f49d12e2ff07bd94cf24ca2 100644 --- a/llvm/test/MC/LoongArch/Basic/Float/f-comp.s +++ b/llvm/test/MC/LoongArch/Basic/Float/f-comp.s @@ -1,11 +1,11 @@ -# RUN: llvm-mc %s --triple=loongarch32 --mattr=+f --show-encoding \ +# RUN: llvm-mc %s --triple=loongarch32 --show-encoding \ # RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s -# RUN: llvm-mc %s --triple=loongarch64 --mattr=+f --show-encoding \ +# RUN: llvm-mc %s --triple=loongarch64 --show-encoding \ # RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s -# RUN: llvm-mc %s --triple=loongarch32 --mattr=+f --filetype=obj \ +# RUN: llvm-mc %s --triple=loongarch32 --filetype=obj \ # RUN: | llvm-objdump -d - \ # RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s -# RUN: llvm-mc %s --triple=loongarch64 --mattr=+f --filetype=obj \ +# RUN: llvm-mc %s --triple=loongarch64 --filetype=obj \ # RUN: | llvm-objdump -d - \ # RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s diff --git a/llvm/test/MC/LoongArch/Basic/Float/f-conv.s b/llvm/test/MC/LoongArch/Basic/Float/f-conv.s index 069dab10c25a0f776f5d8c90d9dabff7f4355760..b68c472cdbf11cfed43b5709656f79579167f46c 100644 --- a/llvm/test/MC/LoongArch/Basic/Float/f-conv.s +++ b/llvm/test/MC/LoongArch/Basic/Float/f-conv.s @@ -1,11 +1,11 @@ -# RUN: llvm-mc %s --triple=loongarch32 --mattr=+f --show-encoding \ +# RUN: llvm-mc %s --triple=loongarch32 --show-encoding \ # RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s -# RUN: llvm-mc %s --triple=loongarch64 --mattr=+f --show-encoding \ +# RUN: llvm-mc %s --triple=loongarch64 --show-encoding \ # RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s -# RUN: llvm-mc %s --triple=loongarch32 --mattr=+f --filetype=obj \ +# RUN: llvm-mc %s --triple=loongarch32 --filetype=obj \ # RUN: | llvm-objdump -d - \ # RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s -# RUN: llvm-mc %s --triple=loongarch64 --mattr=+f --filetype=obj \ +# RUN: llvm-mc %s --triple=loongarch64 --filetype=obj \ # RUN: | llvm-objdump -d - \ # RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s diff --git a/llvm/test/MC/LoongArch/Basic/Float/f-invalid.s b/llvm/test/MC/LoongArch/Basic/Float/f-invalid.s deleted file mode 100644 index 2ab91b3f1a777bdc7007da43e9f8685592d93308..0000000000000000000000000000000000000000 --- a/llvm/test/MC/LoongArch/Basic/Float/f-invalid.s +++ /dev/null @@ -1,4 +0,0 @@ -# RUN: not llvm-mc --triple=loongarch32 -mattr=+f %s 2>&1 | FileCheck %s - -# CHECK: :[[#@LINE+1]]:1: error: instruction requires the following: 'D' (Double-Precision Floating-Point) -fadd.d $fa0, $fa0, $fa0 diff --git a/llvm/test/MC/LoongArch/Basic/Float/f-memory.s b/llvm/test/MC/LoongArch/Basic/Float/f-memory.s index a614e867e1d921a737a930be3be66c653cafc9cb..935deb3ebc2323fafdd05df00e81766d832bef0d 100644 --- a/llvm/test/MC/LoongArch/Basic/Float/f-memory.s +++ b/llvm/test/MC/LoongArch/Basic/Float/f-memory.s @@ -1,11 +1,11 @@ -# RUN: llvm-mc %s --triple=loongarch32 --mattr=+f --show-encoding \ +# RUN: llvm-mc %s --triple=loongarch32 --show-encoding \ # RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s -# RUN: llvm-mc %s --triple=loongarch64 --mattr=+f --show-encoding \ +# RUN: llvm-mc %s --triple=loongarch64 --show-encoding \ # RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s -# RUN: llvm-mc %s --triple=loongarch32 --mattr=+f --filetype=obj \ +# RUN: llvm-mc %s --triple=loongarch32 --filetype=obj \ # RUN: | llvm-objdump -d - \ # RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s -# RUN: llvm-mc %s --triple=loongarch64 --mattr=+f --filetype=obj \ +# RUN: llvm-mc %s --triple=loongarch64 --filetype=obj \ # RUN: | llvm-objdump -d - \ # RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s diff --git a/llvm/test/MC/LoongArch/Basic/Float/f-move.s b/llvm/test/MC/LoongArch/Basic/Float/f-move.s index 26702d60b68a05e7e755f0a29abda8b190d1ba37..ed6d68f438a73a0ba9e6a2ad04dd2f757b2c329c 100644 --- a/llvm/test/MC/LoongArch/Basic/Float/f-move.s +++ b/llvm/test/MC/LoongArch/Basic/Float/f-move.s @@ -1,11 +1,11 @@ -# RUN: llvm-mc %s --triple=loongarch32 --mattr=+f --show-encoding \ +# RUN: llvm-mc %s --triple=loongarch32 --show-encoding \ # RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s -# RUN: llvm-mc %s --triple=loongarch64 --mattr=+f --show-encoding \ +# RUN: llvm-mc %s --triple=loongarch64 --show-encoding \ # RUN: | FileCheck --check-prefixes=ASM-AND-OBJ,ASM %s -# RUN: llvm-mc %s --triple=loongarch32 --mattr=+f --filetype=obj \ +# RUN: llvm-mc %s --triple=loongarch32 --filetype=obj \ # RUN: | llvm-objdump -d - \ # RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s -# RUN: llvm-mc %s --triple=loongarch64 --mattr=+f --filetype=obj \ +# RUN: llvm-mc %s --triple=loongarch64 --filetype=obj \ # RUN: | llvm-objdump -d - \ # RUN: | FileCheck --check-prefix=ASM-AND-OBJ %s diff --git a/llvm/test/MC/LoongArch/Basic/Integer/invalid.s b/llvm/test/MC/LoongArch/Basic/Integer/invalid.s index b226d8c9b7d24f2273873bc287760f5d7cdb73de..958d5cab6f2f316157cdb5348dd2e71c17eaf4c8 100644 --- a/llvm/test/MC/LoongArch/Basic/Integer/invalid.s +++ b/llvm/test/MC/LoongArch/Basic/Integer/invalid.s @@ -1,9 +1,7 @@ ## Test invalid instructions on both loongarch32 and loongarch64 target. -# RUN: not llvm-mc --triple=loongarch32 --mattr=-f %s 2>&1 \ -# RUN: | FileCheck %s --check-prefixes=CHECK,CHECK64 -# RUN: not llvm-mc --triple=loongarch64 --mattr=-f %s 2>&1 --defsym=LA64=1 \ -# RUN: | FileCheck %s +# RUN: not llvm-mc --triple=loongarch32 %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK64 +# RUN: not llvm-mc --triple=loongarch64 %s 2>&1 --defsym=LA64=1 | FileCheck %s ## Out of range immediates ## uimm2 @@ -181,12 +179,6 @@ andi $a0, $a0 ## Instructions outside the base integer ISA ## TODO: Test instructions in LSX/LASX/LBT/LVZ after their introduction. -## Floating-Point mnemonics -fadd.s $fa0, $fa0, $fa0 -# CHECK: :[[#@LINE-1]]:1: error: instruction requires the following: 'F' (Single-Precision Floating-Point) -fadd.d $fa0, $fa0, $fa0 -# CHECK: :[[#@LINE-1]]:1: error: instruction requires the following: 'D' (Double-Precision Floating-Point) - ## Using floating point registers when integer registers are expected sll.w $a0, $a0, $fa0 # CHECK: :[[#@LINE-1]]:18: error: invalid operand for instruction diff --git a/llvm/test/MC/LoongArch/Basic/Integer/invalid64.s b/llvm/test/MC/LoongArch/Basic/Integer/invalid64.s index acddca9432a698aa30f7bff95dd60b7417edd72f..1c1c658ad440f83141b32e3290a34f86f2f0fc6f 100644 --- a/llvm/test/MC/LoongArch/Basic/Integer/invalid64.s +++ b/llvm/test/MC/LoongArch/Basic/Integer/invalid64.s @@ -65,7 +65,7 @@ addu16i.d $a0, $a0, 32768 ## simm20 pcaddu18i $a0, 0x80000 -# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-524288, 524287] +# CHECK: :[[#@LINE-1]]:16: error: operand must be a symbol with modifier (e.g. %call36) or an integer in the range [-524288, 524287] ## simm20_lu32id lu32i.d $a0, 0x80000 diff --git a/llvm/test/MC/LoongArch/Macros/macros-call.s b/llvm/test/MC/LoongArch/Macros/macros-call.s new file mode 100644 index 0000000000000000000000000000000000000000..a648a397803817943485aecda02d5e90ffad9543 --- /dev/null +++ b/llvm/test/MC/LoongArch/Macros/macros-call.s @@ -0,0 +1,9 @@ +# RUN: llvm-mc --triple=loongarch64 %s | FileCheck %s + +call36 sym_call +# CHECK: pcaddu18i $ra, %call36(sym_call) +# CHECK-NEXT: jirl $ra, $ra, 0 + +tail36 $t0, sym_tail +# CHECK: pcaddu18i $t0, %call36(sym_tail) +# CHECK-NEXT: jr $t0 diff --git a/llvm/test/MC/LoongArch/Relocations/relocations.s b/llvm/test/MC/LoongArch/Relocations/relocations.s index 042cc93470a1e5b1b72b55814e0d78529bc0bbe1..bec71e103893331e0c3392133442f25fa986064a 100644 --- a/llvm/test/MC/LoongArch/Relocations/relocations.s +++ b/llvm/test/MC/LoongArch/Relocations/relocations.s @@ -218,3 +218,8 @@ lu12i.w $t1, %gd_hi20(foo) # RELOC: R_LARCH_TLS_GD_HI20 foo 0x0 # INSTR: lu12i.w $t1, %gd_hi20(foo) # FIXUP: fixup A - offset: 0, value: %gd_hi20(foo), kind: FK_NONE + +pcaddu18i $t1, %call36(foo) +# RELOC: R_LARCH_CALL36 foo 0x0 +# INSTR: pcaddu18i $t1, %call36(foo) +# FIXUP: fixup A - offset: 0, value: %call36(foo), kind: FK_NONE diff --git a/llvm/test/MC/LoongArch/lasx/absd.s b/llvm/test/MC/LoongArch/lasx/absd.s new file mode 100644 index 0000000000000000000000000000000000000000..9ac1bece8780b07f65de5783fdf8bc594ed3ac15 --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/absd.s @@ -0,0 +1,36 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvabsd.b $xr22, $xr1, $xr17 +# CHECK-INST: xvabsd.b $xr22, $xr1, $xr17 +# CHECK-ENCODING: encoding: [0x36,0x44,0x60,0x74] + +xvabsd.h $xr17, $xr24, $xr9 +# CHECK-INST: xvabsd.h $xr17, $xr24, $xr9 +# CHECK-ENCODING: encoding: [0x11,0xa7,0x60,0x74] + +xvabsd.w $xr28, $xr9, $xr29 +# CHECK-INST: xvabsd.w $xr28, $xr9, $xr29 +# CHECK-ENCODING: encoding: [0x3c,0x75,0x61,0x74] + +xvabsd.d $xr30, $xr23, $xr19 +# CHECK-INST: xvabsd.d $xr30, $xr23, $xr19 +# CHECK-ENCODING: encoding: [0xfe,0xce,0x61,0x74] + +xvabsd.bu $xr16, $xr4, $xr15 +# CHECK-INST: xvabsd.bu $xr16, $xr4, $xr15 +# CHECK-ENCODING: encoding: [0x90,0x3c,0x62,0x74] + +xvabsd.hu $xr13, $xr23, $xr27 +# CHECK-INST: xvabsd.hu $xr13, $xr23, $xr27 +# CHECK-ENCODING: encoding: [0xed,0xee,0x62,0x74] + +xvabsd.wu $xr31, $xr18, $xr15 +# CHECK-INST: xvabsd.wu $xr31, $xr18, $xr15 +# CHECK-ENCODING: encoding: [0x5f,0x3e,0x63,0x74] + +xvabsd.du $xr26, $xr10, $xr4 +# CHECK-INST: xvabsd.du $xr26, $xr10, $xr4 +# CHECK-ENCODING: encoding: [0x5a,0x91,0x63,0x74] diff --git a/llvm/test/MC/LoongArch/lasx/add.s b/llvm/test/MC/LoongArch/lasx/add.s new file mode 100644 index 0000000000000000000000000000000000000000..b7f92ef826ec9caf2c9d7e23200b743bf228112b --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/add.s @@ -0,0 +1,24 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvadd.b $xr20, $xr19, $xr5 +# CHECK-INST: xvadd.b $xr20, $xr19, $xr5 +# CHECK-ENCODING: encoding: [0x74,0x16,0x0a,0x74] + +xvadd.h $xr24, $xr7, $xr14 +# CHECK-INST: xvadd.h $xr24, $xr7, $xr14 +# CHECK-ENCODING: encoding: [0xf8,0xb8,0x0a,0x74] + +xvadd.w $xr19, $xr1, $xr21 +# CHECK-INST: xvadd.w $xr19, $xr1, $xr21 +# CHECK-ENCODING: encoding: [0x33,0x54,0x0b,0x74] + +xvadd.d $xr19, $xr6, $xr13 +# CHECK-INST: xvadd.d $xr19, $xr6, $xr13 +# CHECK-ENCODING: encoding: [0xd3,0xb4,0x0b,0x74] + +xvadd.q $xr4, $xr28, $xr6 +# CHECK-INST: xvadd.q $xr4, $xr28, $xr6 +# CHECK-ENCODING: encoding: [0x84,0x1b,0x2d,0x75] diff --git a/llvm/test/MC/LoongArch/lasx/adda.s b/llvm/test/MC/LoongArch/lasx/adda.s new file mode 100644 index 0000000000000000000000000000000000000000..849c560173b6cf3ad78e12da5276eec47e824f27 --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/adda.s @@ -0,0 +1,20 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvadda.b $xr10, $xr24, $xr27 +# CHECK-INST: xvadda.b $xr10, $xr24, $xr27 +# CHECK-ENCODING: encoding: [0x0a,0x6f,0x5c,0x74] + +xvadda.h $xr0, $xr28, $xr29 +# CHECK-INST: xvadda.h $xr0, $xr28, $xr29 +# CHECK-ENCODING: encoding: [0x80,0xf7,0x5c,0x74] + +xvadda.w $xr31, $xr9, $xr9 +# CHECK-INST: xvadda.w $xr31, $xr9, $xr9 +# CHECK-ENCODING: encoding: [0x3f,0x25,0x5d,0x74] + +xvadda.d $xr10, $xr1, $xr25 +# CHECK-INST: xvadda.d $xr10, $xr1, $xr25 +# CHECK-ENCODING: encoding: [0x2a,0xe4,0x5d,0x74] diff --git a/llvm/test/MC/LoongArch/lasx/addi.s b/llvm/test/MC/LoongArch/lasx/addi.s new file mode 100644 index 0000000000000000000000000000000000000000..0bd5cd96562643b8049a7c86b67b3483f93f63ed --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/addi.s @@ -0,0 +1,20 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvaddi.bu $xr1, $xr22, 2 +# CHECK-INST: xvaddi.bu $xr1, $xr22, 2 +# CHECK-ENCODING: encoding: [0xc1,0x0a,0x8a,0x76] + +xvaddi.hu $xr3, $xr10, 29 +# CHECK-INST: xvaddi.hu $xr3, $xr10, 29 +# CHECK-ENCODING: encoding: [0x43,0xf5,0x8a,0x76] + +xvaddi.wu $xr5, $xr11, 3 +# CHECK-INST: xvaddi.wu $xr5, $xr11, 3 +# CHECK-ENCODING: encoding: [0x65,0x0d,0x8b,0x76] + +xvaddi.du $xr6, $xr0, 7 +# CHECK-INST: xvaddi.du $xr6, $xr0, 7 +# CHECK-ENCODING: encoding: [0x06,0x9c,0x8b,0x76] diff --git a/llvm/test/MC/LoongArch/lasx/addw.s b/llvm/test/MC/LoongArch/lasx/addw.s new file mode 100644 index 0000000000000000000000000000000000000000..c9fb22a70f09f723b137157b33684152b08173b2 --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/addw.s @@ -0,0 +1,100 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvaddwev.h.b $xr23, $xr30, $xr4 +# CHECK-INST: xvaddwev.h.b $xr23, $xr30, $xr4 +# CHECK-ENCODING: encoding: [0xd7,0x13,0x1e,0x74] + +xvaddwev.w.h $xr20, $xr19, $xr31 +# CHECK-INST: xvaddwev.w.h $xr20, $xr19, $xr31 +# CHECK-ENCODING: encoding: [0x74,0xfe,0x1e,0x74] + +xvaddwev.d.w $xr8, $xr9, $xr25 +# CHECK-INST: xvaddwev.d.w $xr8, $xr9, $xr25 +# CHECK-ENCODING: encoding: [0x28,0x65,0x1f,0x74] + +xvaddwev.q.d $xr29, $xr22, $xr29 +# CHECK-INST: xvaddwev.q.d $xr29, $xr22, $xr29 +# CHECK-ENCODING: encoding: [0xdd,0xf6,0x1f,0x74] + +xvaddwev.h.bu $xr30, $xr13, $xr26 +# CHECK-INST: xvaddwev.h.bu $xr30, $xr13, $xr26 +# CHECK-ENCODING: encoding: [0xbe,0x69,0x2e,0x74] + +xvaddwev.w.hu $xr15, $xr31, $xr16 +# CHECK-INST: xvaddwev.w.hu $xr15, $xr31, $xr16 +# CHECK-ENCODING: encoding: [0xef,0xc3,0x2e,0x74] + +xvaddwev.d.wu $xr16, $xr16, $xr20 +# CHECK-INST: xvaddwev.d.wu $xr16, $xr16, $xr20 +# CHECK-ENCODING: encoding: [0x10,0x52,0x2f,0x74] + +xvaddwev.q.du $xr10, $xr18, $xr18 +# CHECK-INST: xvaddwev.q.du $xr10, $xr18, $xr18 +# CHECK-ENCODING: encoding: [0x4a,0xca,0x2f,0x74] + +xvaddwev.h.bu.b $xr3, $xr7, $xr9 +# CHECK-INST: xvaddwev.h.bu.b $xr3, $xr7, $xr9 +# CHECK-ENCODING: encoding: [0xe3,0x24,0x3e,0x74] + +xvaddwev.w.hu.h $xr26, $xr16, $xr27 +# CHECK-INST: xvaddwev.w.hu.h $xr26, $xr16, $xr27 +# CHECK-ENCODING: encoding: [0x1a,0xee,0x3e,0x74] + +xvaddwev.d.wu.w $xr0, $xr13, $xr8 +# CHECK-INST: xvaddwev.d.wu.w $xr0, $xr13, $xr8 +# CHECK-ENCODING: encoding: [0xa0,0x21,0x3f,0x74] + +xvaddwev.q.du.d $xr19, $xr10, $xr3 +# CHECK-INST: xvaddwev.q.du.d $xr19, $xr10, $xr3 +# CHECK-ENCODING: encoding: [0x53,0x8d,0x3f,0x74] + +xvaddwod.h.b $xr14, $xr21, $xr24 +# CHECK-INST: xvaddwod.h.b $xr14, $xr21, $xr24 +# CHECK-ENCODING: encoding: [0xae,0x62,0x22,0x74] + +xvaddwod.w.h $xr19, $xr26, $xr23 +# CHECK-INST: xvaddwod.w.h $xr19, $xr26, $xr23 +# CHECK-ENCODING: encoding: [0x53,0xdf,0x22,0x74] + +xvaddwod.d.w $xr12, $xr9, $xr20 +# CHECK-INST: xvaddwod.d.w $xr12, $xr9, $xr20 +# CHECK-ENCODING: encoding: [0x2c,0x51,0x23,0x74] + +xvaddwod.q.d $xr11, $xr2, $xr8 +# CHECK-INST: xvaddwod.q.d $xr11, $xr2, $xr8 +# CHECK-ENCODING: encoding: [0x4b,0xa0,0x23,0x74] + +xvaddwod.h.bu $xr6, $xr6, $xr9 +# CHECK-INST: xvaddwod.h.bu $xr6, $xr6, $xr9 +# CHECK-ENCODING: encoding: [0xc6,0x24,0x32,0x74] + +xvaddwod.w.hu $xr1, $xr27, $xr25 +# CHECK-INST: xvaddwod.w.hu $xr1, $xr27, $xr25 +# CHECK-ENCODING: encoding: [0x61,0xe7,0x32,0x74] + +xvaddwod.d.wu $xr26, $xr19, $xr11 +# CHECK-INST: xvaddwod.d.wu $xr26, $xr19, $xr11 +# CHECK-ENCODING: encoding: [0x7a,0x2e,0x33,0x74] + +xvaddwod.q.du $xr21, $xr22, $xr8 +# CHECK-INST: xvaddwod.q.du $xr21, $xr22, $xr8 +# CHECK-ENCODING: encoding: [0xd5,0xa2,0x33,0x74] + +xvaddwod.h.bu.b $xr21, $xr26, $xr24 +# CHECK-INST: xvaddwod.h.bu.b $xr21, $xr26, $xr24 +# CHECK-ENCODING: encoding: [0x55,0x63,0x40,0x74] + +xvaddwod.w.hu.h $xr31, $xr6, $xr16 +# CHECK-INST: xvaddwod.w.hu.h $xr31, $xr6, $xr16 +# CHECK-ENCODING: encoding: [0xdf,0xc0,0x40,0x74] + +xvaddwod.d.wu.w $xr12, $xr28, $xr31 +# CHECK-INST: xvaddwod.d.wu.w $xr12, $xr28, $xr31 +# CHECK-ENCODING: encoding: [0x8c,0x7f,0x41,0x74] + +xvaddwod.q.du.d $xr29, $xr4, $xr12 +# CHECK-INST: xvaddwod.q.du.d $xr29, $xr4, $xr12 +# CHECK-ENCODING: encoding: [0x9d,0xb0,0x41,0x74] diff --git a/llvm/test/MC/LoongArch/lasx/and.s b/llvm/test/MC/LoongArch/lasx/and.s new file mode 100644 index 0000000000000000000000000000000000000000..8c865d3424e0878f1c6afc3769c0db81de6f3599 --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/and.s @@ -0,0 +1,8 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvand.v $xr14, $xr23, $xr19 +# CHECK-INST: xvand.v $xr14, $xr23, $xr19 +# CHECK-ENCODING: encoding: [0xee,0x4e,0x26,0x75] diff --git a/llvm/test/MC/LoongArch/lasx/andi.s b/llvm/test/MC/LoongArch/lasx/andi.s new file mode 100644 index 0000000000000000000000000000000000000000..6f198dcfd9e5bd2f500e4746ec291bc2c45139a0 --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/andi.s @@ -0,0 +1,8 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvandi.b $xr11, $xr7, 66 +# CHECK-INST: xvandi.b $xr11, $xr7, 66 +# CHECK-ENCODING: encoding: [0xeb,0x08,0xd1,0x77] diff --git a/llvm/test/MC/LoongArch/lasx/andn.s b/llvm/test/MC/LoongArch/lasx/andn.s new file mode 100644 index 0000000000000000000000000000000000000000..815ecc3732042e295f268dd6c7d5a69ca363a51c --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/andn.s @@ -0,0 +1,8 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvandn.v $xr3, $xr15, $xr3 +# CHECK-INST: xvandn.v $xr3, $xr15, $xr3 +# CHECK-ENCODING: encoding: [0xe3,0x0d,0x28,0x75] diff --git a/llvm/test/MC/LoongArch/lasx/avg.s b/llvm/test/MC/LoongArch/lasx/avg.s new file mode 100644 index 0000000000000000000000000000000000000000..cdb2bd8cfde1153589142a53c0e51afa756122cb --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/avg.s @@ -0,0 +1,36 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvavg.b $xr5, $xr30, $xr21 +# CHECK-INST: xvavg.b $xr5, $xr30, $xr21 +# CHECK-ENCODING: encoding: [0xc5,0x57,0x64,0x74] + +xvavg.h $xr18, $xr17, $xr21 +# CHECK-INST: xvavg.h $xr18, $xr17, $xr21 +# CHECK-ENCODING: encoding: [0x32,0xd6,0x64,0x74] + +xvavg.w $xr3, $xr23, $xr20 +# CHECK-INST: xvavg.w $xr3, $xr23, $xr20 +# CHECK-ENCODING: encoding: [0xe3,0x52,0x65,0x74] + +xvavg.d $xr27, $xr0, $xr27 +# CHECK-INST: xvavg.d $xr27, $xr0, $xr27 +# CHECK-ENCODING: encoding: [0x1b,0xec,0x65,0x74] + +xvavg.bu $xr11, $xr4, $xr16 +# CHECK-INST: xvavg.bu $xr11, $xr4, $xr16 +# CHECK-ENCODING: encoding: [0x8b,0x40,0x66,0x74] + +xvavg.hu $xr2, $xr1, $xr19 +# CHECK-INST: xvavg.hu $xr2, $xr1, $xr19 +# CHECK-ENCODING: encoding: [0x22,0xcc,0x66,0x74] + +xvavg.wu $xr27, $xr20, $xr27 +# CHECK-INST: xvavg.wu $xr27, $xr20, $xr27 +# CHECK-ENCODING: encoding: [0x9b,0x6e,0x67,0x74] + +xvavg.du $xr23, $xr20, $xr29 +# CHECK-INST: xvavg.du $xr23, $xr20, $xr29 +# CHECK-ENCODING: encoding: [0x97,0xf6,0x67,0x74] diff --git a/llvm/test/MC/LoongArch/lasx/avgr.s b/llvm/test/MC/LoongArch/lasx/avgr.s new file mode 100644 index 0000000000000000000000000000000000000000..f28a48d249ef17afa22e200b9ecb895e441fb2f1 --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/avgr.s @@ -0,0 +1,36 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvavgr.b $xr29, $xr15, $xr7 +# CHECK-INST: xvavgr.b $xr29, $xr15, $xr7 +# CHECK-ENCODING: encoding: [0xfd,0x1d,0x68,0x74] + +xvavgr.h $xr0, $xr26, $xr15 +# CHECK-INST: xvavgr.h $xr0, $xr26, $xr15 +# CHECK-ENCODING: encoding: [0x40,0xbf,0x68,0x74] + +xvavgr.w $xr23, $xr0, $xr0 +# CHECK-INST: xvavgr.w $xr23, $xr0, $xr0 +# CHECK-ENCODING: encoding: [0x17,0x00,0x69,0x74] + +xvavgr.d $xr29, $xr23, $xr0 +# CHECK-INST: xvavgr.d $xr29, $xr23, $xr0 +# CHECK-ENCODING: encoding: [0xfd,0x82,0x69,0x74] + +xvavgr.bu $xr22, $xr2, $xr25 +# CHECK-INST: xvavgr.bu $xr22, $xr2, $xr25 +# CHECK-ENCODING: encoding: [0x56,0x64,0x6a,0x74] + +xvavgr.hu $xr25, $xr10, $xr21 +# CHECK-INST: xvavgr.hu $xr25, $xr10, $xr21 +# CHECK-ENCODING: encoding: [0x59,0xd5,0x6a,0x74] + +xvavgr.wu $xr17, $xr14, $xr3 +# CHECK-INST: xvavgr.wu $xr17, $xr14, $xr3 +# CHECK-ENCODING: encoding: [0xd1,0x0d,0x6b,0x74] + +xvavgr.du $xr2, $xr11, $xr13 +# CHECK-INST: xvavgr.du $xr2, $xr11, $xr13 +# CHECK-ENCODING: encoding: [0x62,0xb5,0x6b,0x74] diff --git a/llvm/test/MC/LoongArch/lasx/bitclr.s b/llvm/test/MC/LoongArch/lasx/bitclr.s new file mode 100644 index 0000000000000000000000000000000000000000..5cb47cdf3616cf839e18071894c6b84c5ce9ed77 --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/bitclr.s @@ -0,0 +1,36 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvbitclr.b $xr24, $xr5, $xr14 +# CHECK-INST: xvbitclr.b $xr24, $xr5, $xr14 +# CHECK-ENCODING: encoding: [0xb8,0x38,0x0c,0x75] + +xvbitclr.h $xr30, $xr9, $xr13 +# CHECK-INST: xvbitclr.h $xr30, $xr9, $xr13 +# CHECK-ENCODING: encoding: [0x3e,0xb5,0x0c,0x75] + +xvbitclr.w $xr2, $xr3, $xr7 +# CHECK-INST: xvbitclr.w $xr2, $xr3, $xr7 +# CHECK-ENCODING: encoding: [0x62,0x1c,0x0d,0x75] + +xvbitclr.d $xr14, $xr5, $xr25 +# CHECK-INST: xvbitclr.d $xr14, $xr5, $xr25 +# CHECK-ENCODING: encoding: [0xae,0xe4,0x0d,0x75] + +xvbitclri.b $xr22, $xr26, 7 +# CHECK-INST: xvbitclri.b $xr22, $xr26, 7 +# CHECK-ENCODING: encoding: [0x56,0x3f,0x10,0x77] + +xvbitclri.h $xr2, $xr14, 13 +# CHECK-INST: xvbitclri.h $xr2, $xr14, 13 +# CHECK-ENCODING: encoding: [0xc2,0x75,0x10,0x77] + +xvbitclri.w $xr3, $xr2, 0 +# CHECK-INST: xvbitclri.w $xr3, $xr2, 0 +# CHECK-ENCODING: encoding: [0x43,0x80,0x10,0x77] + +xvbitclri.d $xr10, $xr12, 7 +# CHECK-INST: xvbitclri.d $xr10, $xr12, 7 +# CHECK-ENCODING: encoding: [0x8a,0x1d,0x11,0x77] diff --git a/llvm/test/MC/LoongArch/lasx/bitrev.s b/llvm/test/MC/LoongArch/lasx/bitrev.s new file mode 100644 index 0000000000000000000000000000000000000000..aab31cd9dc5d3d3c5ed52d9a47d660926af6f26b --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/bitrev.s @@ -0,0 +1,36 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvbitrev.b $xr16, $xr20, $xr3 +# CHECK-INST: xvbitrev.b $xr16, $xr20, $xr3 +# CHECK-ENCODING: encoding: [0x90,0x0e,0x10,0x75] + +xvbitrev.h $xr16, $xr3, $xr20 +# CHECK-INST: xvbitrev.h $xr16, $xr3, $xr20 +# CHECK-ENCODING: encoding: [0x70,0xd0,0x10,0x75] + +xvbitrev.w $xr24, $xr26, $xr23 +# CHECK-INST: xvbitrev.w $xr24, $xr26, $xr23 +# CHECK-ENCODING: encoding: [0x58,0x5f,0x11,0x75] + +xvbitrev.d $xr13, $xr1, $xr27 +# CHECK-INST: xvbitrev.d $xr13, $xr1, $xr27 +# CHECK-ENCODING: encoding: [0x2d,0xec,0x11,0x75] + +xvbitrevi.b $xr7, $xr11, 5 +# CHECK-INST: xvbitrevi.b $xr7, $xr11, 5 +# CHECK-ENCODING: encoding: [0x67,0x35,0x18,0x77] + +xvbitrevi.h $xr1, $xr5, 15 +# CHECK-INST: xvbitrevi.h $xr1, $xr5, 15 +# CHECK-ENCODING: encoding: [0xa1,0x7c,0x18,0x77] + +xvbitrevi.w $xr13, $xr21, 18 +# CHECK-INST: xvbitrevi.w $xr13, $xr21, 18 +# CHECK-ENCODING: encoding: [0xad,0xca,0x18,0x77] + +xvbitrevi.d $xr1, $xr3, 9 +# CHECK-INST: xvbitrevi.d $xr1, $xr3, 9 +# CHECK-ENCODING: encoding: [0x61,0x24,0x19,0x77] diff --git a/llvm/test/MC/LoongArch/lasx/bitsel.s b/llvm/test/MC/LoongArch/lasx/bitsel.s new file mode 100644 index 0000000000000000000000000000000000000000..e61a4c277f6b3c7273587e8c479bbbac9cb54121 --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/bitsel.s @@ -0,0 +1,8 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvbitsel.v $xr18, $xr29, $xr15, $xr21 +# CHECK-INST: xvbitsel.v $xr18, $xr29, $xr15, $xr21 +# CHECK-ENCODING: encoding: [0xb2,0xbf,0x2a,0x0d] diff --git a/llvm/test/MC/LoongArch/lasx/bitseli.s b/llvm/test/MC/LoongArch/lasx/bitseli.s new file mode 100644 index 0000000000000000000000000000000000000000..18212e9966de0c51e25f0f56f49f473bcbb6c034 --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/bitseli.s @@ -0,0 +1,8 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvbitseli.b $xr13, $xr21, 121 +# CHECK-INST: xvbitseli.b $xr13, $xr21, 121 +# CHECK-ENCODING: encoding: [0xad,0xe6,0xc5,0x77] diff --git a/llvm/test/MC/LoongArch/lasx/bitset.s b/llvm/test/MC/LoongArch/lasx/bitset.s new file mode 100644 index 0000000000000000000000000000000000000000..65eaef758520c0fa3d8ce1981628ec08f5c7d3ec --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/bitset.s @@ -0,0 +1,36 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvbitset.b $xr6, $xr16, $xr28 +# CHECK-INST: xvbitset.b $xr6, $xr16, $xr28 +# CHECK-ENCODING: encoding: [0x06,0x72,0x0e,0x75] + +xvbitset.h $xr5, $xr13, $xr31 +# CHECK-INST: xvbitset.h $xr5, $xr13, $xr31 +# CHECK-ENCODING: encoding: [0xa5,0xfd,0x0e,0x75] + +xvbitset.w $xr7, $xr28, $xr8 +# CHECK-INST: xvbitset.w $xr7, $xr28, $xr8 +# CHECK-ENCODING: encoding: [0x87,0x23,0x0f,0x75] + +xvbitset.d $xr4, $xr16, $xr12 +# CHECK-INST: xvbitset.d $xr4, $xr16, $xr12 +# CHECK-ENCODING: encoding: [0x04,0xb2,0x0f,0x75] + +xvbitseti.b $xr26, $xr3, 0 +# CHECK-INST: xvbitseti.b $xr26, $xr3, 0 +# CHECK-ENCODING: encoding: [0x7a,0x20,0x14,0x77] + +xvbitseti.h $xr9, $xr19, 9 +# CHECK-INST: xvbitseti.h $xr9, $xr19, 9 +# CHECK-ENCODING: encoding: [0x69,0x66,0x14,0x77] + +xvbitseti.w $xr12, $xr19, 2 +# CHECK-INST: xvbitseti.w $xr12, $xr19, 2 +# CHECK-ENCODING: encoding: [0x6c,0x8a,0x14,0x77] + +xvbitseti.d $xr20, $xr7, 2 +# CHECK-INST: xvbitseti.d $xr20, $xr7, 2 +# CHECK-ENCODING: encoding: [0xf4,0x08,0x15,0x77] diff --git a/llvm/test/MC/LoongArch/lasx/bsll.s b/llvm/test/MC/LoongArch/lasx/bsll.s new file mode 100644 index 0000000000000000000000000000000000000000..3eb829748528ea5dea164c455642139e7853894d --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/bsll.s @@ -0,0 +1,8 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvbsll.v $xr14, $xr21, 20 +# CHECK-INST: xvbsll.v $xr14, $xr21, 20 +# CHECK-ENCODING: encoding: [0xae,0x52,0x8e,0x76] diff --git a/llvm/test/MC/LoongArch/lasx/bsrl.s b/llvm/test/MC/LoongArch/lasx/bsrl.s new file mode 100644 index 0000000000000000000000000000000000000000..5481ca24f12c5f553cbcd50a8d3af5d8a27e5803 --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/bsrl.s @@ -0,0 +1,8 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvbsrl.v $xr4, $xr5, 29 +# CHECK-INST: xvbsrl.v $xr4, $xr5, 29 +# CHECK-ENCODING: encoding: [0xa4,0xf4,0x8e,0x76] diff --git a/llvm/test/MC/LoongArch/lasx/clo.s b/llvm/test/MC/LoongArch/lasx/clo.s new file mode 100644 index 0000000000000000000000000000000000000000..5b2b093de3448b47f7d175de0d7ea1a7431f7e18 --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/clo.s @@ -0,0 +1,20 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvclo.b $xr9, $xr12 +# CHECK-INST: xvclo.b $xr9, $xr12 +# CHECK-ENCODING: encoding: [0x89,0x01,0x9c,0x76] + +xvclo.h $xr16, $xr14 +# CHECK-INST: xvclo.h $xr16, $xr14 +# CHECK-ENCODING: encoding: [0xd0,0x05,0x9c,0x76] + +xvclo.w $xr30, $xr18 +# CHECK-INST: xvclo.w $xr30, $xr18 +# CHECK-ENCODING: encoding: [0x5e,0x0a,0x9c,0x76] + +xvclo.d $xr31, $xr5 +# CHECK-INST: xvclo.d $xr31, $xr5 +# CHECK-ENCODING: encoding: [0xbf,0x0c,0x9c,0x76] diff --git a/llvm/test/MC/LoongArch/lasx/clz.s b/llvm/test/MC/LoongArch/lasx/clz.s new file mode 100644 index 0000000000000000000000000000000000000000..b61b9193a16ba231c92fc83129694107a6ef924f --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/clz.s @@ -0,0 +1,20 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvclz.b $xr5, $xr6 +# CHECK-INST: xvclz.b $xr5, $xr6 +# CHECK-ENCODING: encoding: [0xc5,0x10,0x9c,0x76] + +xvclz.h $xr4, $xr7 +# CHECK-INST: xvclz.h $xr4, $xr7 +# CHECK-ENCODING: encoding: [0xe4,0x14,0x9c,0x76] + +xvclz.w $xr12, $xr0 +# CHECK-INST: xvclz.w $xr12, $xr0 +# CHECK-ENCODING: encoding: [0x0c,0x18,0x9c,0x76] + +xvclz.d $xr1, $xr0 +# CHECK-INST: xvclz.d $xr1, $xr0 +# CHECK-ENCODING: encoding: [0x01,0x1c,0x9c,0x76] diff --git a/llvm/test/MC/LoongArch/lasx/div.s b/llvm/test/MC/LoongArch/lasx/div.s new file mode 100644 index 0000000000000000000000000000000000000000..c2d721d81597431c873eeda0fcb0077013646c65 --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/div.s @@ -0,0 +1,36 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvdiv.b $xr9, $xr25, $xr8 +# CHECK-INST: xvdiv.b $xr9, $xr25, $xr8 +# CHECK-ENCODING: encoding: [0x29,0x23,0xe0,0x74] + +xvdiv.h $xr18, $xr1, $xr27 +# CHECK-INST: xvdiv.h $xr18, $xr1, $xr27 +# CHECK-ENCODING: encoding: [0x32,0xec,0xe0,0x74] + +xvdiv.w $xr5, $xr26, $xr27 +# CHECK-INST: xvdiv.w $xr5, $xr26, $xr27 +# CHECK-ENCODING: encoding: [0x45,0x6f,0xe1,0x74] + +xvdiv.d $xr27, $xr26, $xr12 +# CHECK-INST: xvdiv.d $xr27, $xr26, $xr12 +# CHECK-ENCODING: encoding: [0x5b,0xb3,0xe1,0x74] + +xvdiv.bu $xr0, $xr22, $xr30 +# CHECK-INST: xvdiv.bu $xr0, $xr22, $xr30 +# CHECK-ENCODING: encoding: [0xc0,0x7a,0xe4,0x74] + +xvdiv.hu $xr31, $xr23, $xr25 +# CHECK-INST: xvdiv.hu $xr31, $xr23, $xr25 +# CHECK-ENCODING: encoding: [0xff,0xe6,0xe4,0x74] + +xvdiv.wu $xr1, $xr25, $xr7 +# CHECK-INST: xvdiv.wu $xr1, $xr25, $xr7 +# CHECK-ENCODING: encoding: [0x21,0x1f,0xe5,0x74] + +xvdiv.du $xr7, $xr25, $xr7 +# CHECK-INST: xvdiv.du $xr7, $xr25, $xr7 +# CHECK-ENCODING: encoding: [0x27,0x9f,0xe5,0x74] diff --git a/llvm/test/MC/LoongArch/lasx/ext2xv.s b/llvm/test/MC/LoongArch/lasx/ext2xv.s new file mode 100644 index 0000000000000000000000000000000000000000..98310657d6dda1e1d81b987bf5d4d357a5fadf4a --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/ext2xv.s @@ -0,0 +1,52 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vext2xv.h.b $xr30, $xr19 +# CHECK-INST: vext2xv.h.b $xr30, $xr19 +# CHECK-ENCODING: encoding: [0x7e,0x12,0x9f,0x76] + +vext2xv.w.b $xr27, $xr5 +# CHECK-INST: vext2xv.w.b $xr27, $xr5 +# CHECK-ENCODING: encoding: [0xbb,0x14,0x9f,0x76] + +vext2xv.d.b $xr25, $xr25 +# CHECK-INST: vext2xv.d.b $xr25, $xr25 +# CHECK-ENCODING: encoding: [0x39,0x1b,0x9f,0x76] + +vext2xv.w.h $xr20, $xr20 +# CHECK-INST: vext2xv.w.h $xr20, $xr20 +# CHECK-ENCODING: encoding: [0x94,0x1e,0x9f,0x76] + +vext2xv.d.h $xr8, $xr19 +# CHECK-INST: vext2xv.d.h $xr8, $xr19 +# CHECK-ENCODING: encoding: [0x68,0x22,0x9f,0x76] + +vext2xv.d.w $xr4, $xr25 +# CHECK-INST: vext2xv.d.w $xr4, $xr25 +# CHECK-ENCODING: encoding: [0x24,0x27,0x9f,0x76] + +vext2xv.hu.bu $xr25, $xr12 +# CHECK-INST: vext2xv.hu.bu $xr25, $xr12 +# CHECK-ENCODING: encoding: [0x99,0x29,0x9f,0x76] + +vext2xv.wu.bu $xr31, $xr13 +# CHECK-INST: vext2xv.wu.bu $xr31, $xr13 +# CHECK-ENCODING: encoding: [0xbf,0x2d,0x9f,0x76] + +vext2xv.du.bu $xr12, $xr25 +# CHECK-INST: vext2xv.du.bu $xr12, $xr25 +# CHECK-ENCODING: encoding: [0x2c,0x33,0x9f,0x76] + +vext2xv.wu.hu $xr23, $xr12 +# CHECK-INST: vext2xv.wu.hu $xr23, $xr12 +# CHECK-ENCODING: encoding: [0x97,0x35,0x9f,0x76] + +vext2xv.du.hu $xr18, $xr6 +# CHECK-INST: vext2xv.du.hu $xr18, $xr6 +# CHECK-ENCODING: encoding: [0xd2,0x38,0x9f,0x76] + +vext2xv.du.wu $xr10, $xr21 +# CHECK-INST: vext2xv.du.wu $xr10, $xr21 +# CHECK-ENCODING: encoding: [0xaa,0x3e,0x9f,0x76] diff --git a/llvm/test/MC/LoongArch/lasx/exth.s b/llvm/test/MC/LoongArch/lasx/exth.s new file mode 100644 index 0000000000000000000000000000000000000000..1ce1e58e358a1f1813945eba21d329a5a7f6a064 --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/exth.s @@ -0,0 +1,36 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvexth.h.b $xr15, $xr10 +# CHECK-INST: xvexth.h.b $xr15, $xr10 +# CHECK-ENCODING: encoding: [0x4f,0xe1,0x9e,0x76] + +xvexth.w.h $xr26, $xr11 +# CHECK-INST: xvexth.w.h $xr26, $xr11 +# CHECK-ENCODING: encoding: [0x7a,0xe5,0x9e,0x76] + +xvexth.d.w $xr2, $xr27 +# CHECK-INST: xvexth.d.w $xr2, $xr27 +# CHECK-ENCODING: encoding: [0x62,0xeb,0x9e,0x76] + +xvexth.q.d $xr22, $xr25 +# CHECK-INST: xvexth.q.d $xr22, $xr25 +# CHECK-ENCODING: encoding: [0x36,0xef,0x9e,0x76] + +xvexth.hu.bu $xr21, $xr30 +# CHECK-INST: xvexth.hu.bu $xr21, $xr30 +# CHECK-ENCODING: encoding: [0xd5,0xf3,0x9e,0x76] + +xvexth.wu.hu $xr28, $xr11 +# CHECK-INST: xvexth.wu.hu $xr28, $xr11 +# CHECK-ENCODING: encoding: [0x7c,0xf5,0x9e,0x76] + +xvexth.du.wu $xr27, $xr25 +# CHECK-INST: xvexth.du.wu $xr27, $xr25 +# CHECK-ENCODING: encoding: [0x3b,0xfb,0x9e,0x76] + +xvexth.qu.du $xr16, $xr28 +# CHECK-INST: xvexth.qu.du $xr16, $xr28 +# CHECK-ENCODING: encoding: [0x90,0xff,0x9e,0x76] diff --git a/llvm/test/MC/LoongArch/lasx/extl.s b/llvm/test/MC/LoongArch/lasx/extl.s new file mode 100644 index 0000000000000000000000000000000000000000..d6644e00c6d2621388c5cb417f7094c6c86f71d2 --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/extl.s @@ -0,0 +1,12 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvextl.q.d $xr29, $xr12 +# CHECK-INST: xvextl.q.d $xr29, $xr12 +# CHECK-ENCODING: encoding: [0x9d,0x01,0x09,0x77] + +xvextl.qu.du $xr27, $xr20 +# CHECK-INST: xvextl.qu.du $xr27, $xr20 +# CHECK-ENCODING: encoding: [0x9b,0x02,0x0d,0x77] diff --git a/llvm/test/MC/LoongArch/lasx/extrins.s b/llvm/test/MC/LoongArch/lasx/extrins.s new file mode 100644 index 0000000000000000000000000000000000000000..855571049bd957af347d9f70e522d11bf6bc880f --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/extrins.s @@ -0,0 +1,20 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvextrins.b $xr30, $xr23, 252 +# CHECK-INST: xvextrins.b $xr30, $xr23, 252 +# CHECK-ENCODING: encoding: [0xfe,0xf2,0x8f,0x77] + +xvextrins.h $xr0, $xr13, 200 +# CHECK-INST: xvextrins.h $xr0, $xr13, 200 +# CHECK-ENCODING: encoding: [0xa0,0x21,0x8b,0x77] + +xvextrins.w $xr14, $xr21, 152 +# CHECK-INST: xvextrins.w $xr14, $xr21, 152 +# CHECK-ENCODING: encoding: [0xae,0x62,0x86,0x77] + +xvextrins.d $xr31, $xr30, 135 +# CHECK-INST: xvextrins.d $xr31, $xr30, 135 +# CHECK-ENCODING: encoding: [0xdf,0x1f,0x82,0x77] diff --git a/llvm/test/MC/LoongArch/lasx/fadd.s b/llvm/test/MC/LoongArch/lasx/fadd.s new file mode 100644 index 0000000000000000000000000000000000000000..e56beb91fe1f094729d3e0b3afeb247b64538071 --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/fadd.s @@ -0,0 +1,12 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvfadd.s $xr6, $xr21, $xr15 +# CHECK-INST: xvfadd.s $xr6, $xr21, $xr15 +# CHECK-ENCODING: encoding: [0xa6,0xbe,0x30,0x75] + +xvfadd.d $xr27, $xr8, $xr1 +# CHECK-INST: xvfadd.d $xr27, $xr8, $xr1 +# CHECK-ENCODING: encoding: [0x1b,0x05,0x31,0x75] diff --git a/llvm/test/MC/LoongArch/lasx/fclass.s b/llvm/test/MC/LoongArch/lasx/fclass.s new file mode 100644 index 0000000000000000000000000000000000000000..424f77be6f97857919d6739886e04dd2ee7108d6 --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/fclass.s @@ -0,0 +1,12 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvfclass.s $xr3, $xr7 +# CHECK-INST: xvfclass.s $xr3, $xr7 +# CHECK-ENCODING: encoding: [0xe3,0xd4,0x9c,0x76] + +xvfclass.d $xr22, $xr10 +# CHECK-INST: xvfclass.d $xr22, $xr10 +# CHECK-ENCODING: encoding: [0x56,0xd9,0x9c,0x76] diff --git a/llvm/test/MC/LoongArch/lasx/fcmp.s b/llvm/test/MC/LoongArch/lasx/fcmp.s new file mode 100644 index 0000000000000000000000000000000000000000..71759e7f3dcdcf9d7e9ec7ca952a774b9ef3860f --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/fcmp.s @@ -0,0 +1,180 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvfcmp.caf.s $xr1, $xr8, $xr31 +# CHECK-INST: xvfcmp.caf.s $xr1, $xr8, $xr31 +# CHECK-ENCODING: encoding: [0x01,0x7d,0x90,0x0c] + +xvfcmp.caf.d $xr19, $xr31, $xr20 +# CHECK-INST: xvfcmp.caf.d $xr19, $xr31, $xr20 +# CHECK-ENCODING: encoding: [0xf3,0x53,0xa0,0x0c] + +xvfcmp.cun.s $xr8, $xr9, $xr29 +# CHECK-INST: xvfcmp.cun.s $xr8, $xr9, $xr29 +# CHECK-ENCODING: encoding: [0x28,0x75,0x94,0x0c] + +xvfcmp.cun.d $xr19, $xr22, $xr28 +# CHECK-INST: xvfcmp.cun.d $xr19, $xr22, $xr28 +# CHECK-ENCODING: encoding: [0xd3,0x72,0xa4,0x0c] + +xvfcmp.ceq.s $xr0, $xr1, $xr0 +# CHECK-INST: xvfcmp.ceq.s $xr0, $xr1, $xr0 +# CHECK-ENCODING: encoding: [0x20,0x00,0x92,0x0c] + +xvfcmp.ceq.d $xr29, $xr23, $xr20 +# CHECK-INST: xvfcmp.ceq.d $xr29, $xr23, $xr20 +# CHECK-ENCODING: encoding: [0xfd,0x52,0xa2,0x0c] + +xvfcmp.cueq.s $xr5, $xr13, $xr31 +# CHECK-INST: xvfcmp.cueq.s $xr5, $xr13, $xr31 +# CHECK-ENCODING: encoding: [0xa5,0x7d,0x96,0x0c] + +xvfcmp.cueq.d $xr4, $xr22, $xr7 +# CHECK-INST: xvfcmp.cueq.d $xr4, $xr22, $xr7 +# CHECK-ENCODING: encoding: [0xc4,0x1e,0xa6,0x0c] + +xvfcmp.clt.s $xr4, $xr9, $xr1 +# CHECK-INST: xvfcmp.clt.s $xr4, $xr9, $xr1 +# CHECK-ENCODING: encoding: [0x24,0x05,0x91,0x0c] + +xvfcmp.clt.d $xr19, $xr4, $xr21 +# CHECK-INST: xvfcmp.clt.d $xr19, $xr4, $xr21 +# CHECK-ENCODING: encoding: [0x93,0x54,0xa1,0x0c] + +xvfcmp.cult.s $xr15, $xr17, $xr3 +# CHECK-INST: xvfcmp.cult.s $xr15, $xr17, $xr3 +# CHECK-ENCODING: encoding: [0x2f,0x0e,0x95,0x0c] + +xvfcmp.cult.d $xr20, $xr17, $xr6 +# CHECK-INST: xvfcmp.cult.d $xr20, $xr17, $xr6 +# CHECK-ENCODING: encoding: [0x34,0x1a,0xa5,0x0c] + +xvfcmp.cle.s $xr22, $xr22, $xr15 +# CHECK-INST: xvfcmp.cle.s $xr22, $xr22, $xr15 +# CHECK-ENCODING: encoding: [0xd6,0x3e,0x93,0x0c] + +xvfcmp.cle.d $xr21, $xr25, $xr12 +# CHECK-INST: xvfcmp.cle.d $xr21, $xr25, $xr12 +# CHECK-ENCODING: encoding: [0x35,0x33,0xa3,0x0c] + +xvfcmp.cule.s $xr1, $xr2, $xr29 +# CHECK-INST: xvfcmp.cule.s $xr1, $xr2, $xr29 +# CHECK-ENCODING: encoding: [0x41,0x74,0x97,0x0c] + +xvfcmp.cule.d $xr0, $xr5, $xr11 +# CHECK-INST: xvfcmp.cule.d $xr0, $xr5, $xr11 +# CHECK-ENCODING: encoding: [0xa0,0x2c,0xa7,0x0c] + +xvfcmp.cne.s $xr7, $xr17, $xr26 +# CHECK-INST: xvfcmp.cne.s $xr7, $xr17, $xr26 +# CHECK-ENCODING: encoding: [0x27,0x6a,0x98,0x0c] + +xvfcmp.cne.d $xr18, $xr25, $xr0 +# CHECK-INST: xvfcmp.cne.d $xr18, $xr25, $xr0 +# CHECK-ENCODING: encoding: [0x32,0x03,0xa8,0x0c] + +xvfcmp.cor.s $xr1, $xr2, $xr14 +# CHECK-INST: xvfcmp.cor.s $xr1, $xr2, $xr14 +# CHECK-ENCODING: encoding: [0x41,0x38,0x9a,0x0c] + +xvfcmp.cor.d $xr12, $xr19, $xr23 +# CHECK-INST: xvfcmp.cor.d $xr12, $xr19, $xr23 +# CHECK-ENCODING: encoding: [0x6c,0x5e,0xaa,0x0c] + +xvfcmp.cune.s $xr21, $xr17, $xr4 +# CHECK-INST: xvfcmp.cune.s $xr21, $xr17, $xr4 +# CHECK-ENCODING: encoding: [0x35,0x12,0x9c,0x0c] + +xvfcmp.cune.d $xr20, $xr30, $xr12 +# CHECK-INST: xvfcmp.cune.d $xr20, $xr30, $xr12 +# CHECK-ENCODING: encoding: [0xd4,0x33,0xac,0x0c] + +xvfcmp.saf.s $xr23, $xr11, $xr2 +# CHECK-INST: xvfcmp.saf.s $xr23, $xr11, $xr2 +# CHECK-ENCODING: encoding: [0x77,0x89,0x90,0x0c] + +xvfcmp.saf.d $xr7, $xr12, $xr7 +# CHECK-INST: xvfcmp.saf.d $xr7, $xr12, $xr7 +# CHECK-ENCODING: encoding: [0x87,0x9d,0xa0,0x0c] + +xvfcmp.sun.s $xr0, $xr7, $xr30 +# CHECK-INST: xvfcmp.sun.s $xr0, $xr7, $xr30 +# CHECK-ENCODING: encoding: [0xe0,0xf8,0x94,0x0c] + +xvfcmp.sun.d $xr4, $xr11, $xr30 +# CHECK-INST: xvfcmp.sun.d $xr4, $xr11, $xr30 +# CHECK-ENCODING: encoding: [0x64,0xf9,0xa4,0x0c] + +xvfcmp.seq.s $xr15, $xr23, $xr27 +# CHECK-INST: xvfcmp.seq.s $xr15, $xr23, $xr27 +# CHECK-ENCODING: encoding: [0xef,0xee,0x92,0x0c] + +xvfcmp.seq.d $xr15, $xr22, $xr3 +# CHECK-INST: xvfcmp.seq.d $xr15, $xr22, $xr3 +# CHECK-ENCODING: encoding: [0xcf,0x8e,0xa2,0x0c] + +xvfcmp.sueq.s $xr12, $xr26, $xr9 +# CHECK-INST: xvfcmp.sueq.s $xr12, $xr26, $xr9 +# CHECK-ENCODING: encoding: [0x4c,0xa7,0x96,0x0c] + +xvfcmp.sueq.d $xr5, $xr18, $xr17 +# CHECK-INST: xvfcmp.sueq.d $xr5, $xr18, $xr17 +# CHECK-ENCODING: encoding: [0x45,0xc6,0xa6,0x0c] + +xvfcmp.slt.s $xr25, $xr18, $xr31 +# CHECK-INST: xvfcmp.slt.s $xr25, $xr18, $xr31 +# CHECK-ENCODING: encoding: [0x59,0xfe,0x91,0x0c] + +xvfcmp.slt.d $xr17, $xr26, $xr24 +# CHECK-INST: xvfcmp.slt.d $xr17, $xr26, $xr24 +# CHECK-ENCODING: encoding: [0x51,0xe3,0xa1,0x0c] + +xvfcmp.sult.s $xr8, $xr15, $xr18 +# CHECK-INST: xvfcmp.sult.s $xr8, $xr15, $xr18 +# CHECK-ENCODING: encoding: [0xe8,0xc9,0x95,0x0c] + +xvfcmp.sult.d $xr4, $xr4, $xr5 +# CHECK-INST: xvfcmp.sult.d $xr4, $xr4, $xr5 +# CHECK-ENCODING: encoding: [0x84,0x94,0xa5,0x0c] + +xvfcmp.sle.s $xr1, $xr5, $xr16 +# CHECK-INST: xvfcmp.sle.s $xr1, $xr5, $xr16 +# CHECK-ENCODING: encoding: [0xa1,0xc0,0x93,0x0c] + +xvfcmp.sle.d $xr3, $xr1, $xr23 +# CHECK-INST: xvfcmp.sle.d $xr3, $xr1, $xr23 +# CHECK-ENCODING: encoding: [0x23,0xdc,0xa3,0x0c] + +xvfcmp.sule.s $xr23, $xr11, $xr1 +# CHECK-INST: xvfcmp.sule.s $xr23, $xr11, $xr1 +# CHECK-ENCODING: encoding: [0x77,0x85,0x97,0x0c] + +xvfcmp.sule.d $xr11, $xr10, $xr17 +# CHECK-INST: xvfcmp.sule.d $xr11, $xr10, $xr17 +# CHECK-ENCODING: encoding: [0x4b,0xc5,0xa7,0x0c] + +xvfcmp.sne.s $xr27, $xr12, $xr30 +# CHECK-INST: xvfcmp.sne.s $xr27, $xr12, $xr30 +# CHECK-ENCODING: encoding: [0x9b,0xf9,0x98,0x0c] + +xvfcmp.sne.d $xr20, $xr20, $xr17 +# CHECK-INST: xvfcmp.sne.d $xr20, $xr20, $xr17 +# CHECK-ENCODING: encoding: [0x94,0xc6,0xa8,0x0c] + +xvfcmp.sor.s $xr11, $xr13, $xr2 +# CHECK-INST: xvfcmp.sor.s $xr11, $xr13, $xr2 +# CHECK-ENCODING: encoding: [0xab,0x89,0x9a,0x0c] + +xvfcmp.sor.d $xr6, $xr28, $xr6 +# CHECK-INST: xvfcmp.sor.d $xr6, $xr28, $xr6 +# CHECK-ENCODING: encoding: [0x86,0x9b,0xaa,0x0c] + +xvfcmp.sune.s $xr11, $xr16, $xr8 +# CHECK-INST: xvfcmp.sune.s $xr11, $xr16, $xr8 +# CHECK-ENCODING: encoding: [0x0b,0xa2,0x9c,0x0c] + +xvfcmp.sune.d $xr30, $xr5, $xr27 +# CHECK-INST: xvfcmp.sune.d $xr30, $xr5, $xr27 +# CHECK-ENCODING: encoding: [0xbe,0xec,0xac,0x0c] diff --git a/llvm/test/MC/LoongArch/lasx/fcvt.s b/llvm/test/MC/LoongArch/lasx/fcvt.s new file mode 100644 index 0000000000000000000000000000000000000000..da919bd3797a13327b80c91a6113eb2589e3eea6 --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/fcvt.s @@ -0,0 +1,12 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvfcvt.h.s $xr9, $xr17, $xr23 +# CHECK-INST: xvfcvt.h.s $xr9, $xr17, $xr23 +# CHECK-ENCODING: encoding: [0x29,0x5e,0x46,0x75] + +xvfcvt.s.d $xr27, $xr10, $xr29 +# CHECK-INST: xvfcvt.s.d $xr27, $xr10, $xr29 +# CHECK-ENCODING: encoding: [0x5b,0xf5,0x46,0x75] diff --git a/llvm/test/MC/LoongArch/lasx/fcvth.s b/llvm/test/MC/LoongArch/lasx/fcvth.s new file mode 100644 index 0000000000000000000000000000000000000000..099d5dd4604def3a3e9fcc9ab06a288561b2e26f --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/fcvth.s @@ -0,0 +1,12 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvfcvth.s.h $xr9, $xr25 +# CHECK-INST: xvfcvth.s.h $xr9, $xr25 +# CHECK-ENCODING: encoding: [0x29,0xef,0x9d,0x76] + +xvfcvth.d.s $xr29, $xr17 +# CHECK-INST: xvfcvth.d.s $xr29, $xr17 +# CHECK-ENCODING: encoding: [0x3d,0xf6,0x9d,0x76] diff --git a/llvm/test/MC/LoongArch/lasx/fcvtl.s b/llvm/test/MC/LoongArch/lasx/fcvtl.s new file mode 100644 index 0000000000000000000000000000000000000000..2c9941cc64cc9f35e976b8287f8aaef63c4c31f1 --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/fcvtl.s @@ -0,0 +1,12 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvfcvtl.s.h $xr16, $xr14 +# CHECK-INST: xvfcvtl.s.h $xr16, $xr14 +# CHECK-ENCODING: encoding: [0xd0,0xe9,0x9d,0x76] + +xvfcvtl.d.s $xr24, $xr5 +# CHECK-INST: xvfcvtl.d.s $xr24, $xr5 +# CHECK-ENCODING: encoding: [0xb8,0xf0,0x9d,0x76] diff --git a/llvm/test/MC/LoongArch/lasx/fdiv.s b/llvm/test/MC/LoongArch/lasx/fdiv.s new file mode 100644 index 0000000000000000000000000000000000000000..133690f94231ad4c98d19a091d0e5386235880fc --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/fdiv.s @@ -0,0 +1,12 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvfdiv.s $xr29, $xr5, $xr12 +# CHECK-INST: xvfdiv.s $xr29, $xr5, $xr12 +# CHECK-ENCODING: encoding: [0xbd,0xb0,0x3a,0x75] + +xvfdiv.d $xr31, $xr10, $xr30 +# CHECK-INST: xvfdiv.d $xr31, $xr10, $xr30 +# CHECK-ENCODING: encoding: [0x5f,0x79,0x3b,0x75] diff --git a/llvm/test/MC/LoongArch/lasx/ffint.s b/llvm/test/MC/LoongArch/lasx/ffint.s new file mode 100644 index 0000000000000000000000000000000000000000..7cd663a4358f9ce9fa43baebeff7116c01fe114b --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/ffint.s @@ -0,0 +1,32 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvffint.s.w $xr3, $xr5 +# CHECK-INST: xvffint.s.w $xr3, $xr5 +# CHECK-ENCODING: encoding: [0xa3,0x00,0x9e,0x76] + +xvffint.d.l $xr5, $xr19 +# CHECK-INST: xvffint.d.l $xr5, $xr19 +# CHECK-ENCODING: encoding: [0x65,0x0a,0x9e,0x76] + +xvffint.s.wu $xr3, $xr28 +# CHECK-INST: xvffint.s.wu $xr3, $xr28 +# CHECK-ENCODING: encoding: [0x83,0x07,0x9e,0x76] + +xvffint.d.lu $xr31, $xr29 +# CHECK-INST: xvffint.d.lu $xr31, $xr29 +# CHECK-ENCODING: encoding: [0xbf,0x0f,0x9e,0x76] + +xvffintl.d.w $xr2, $xr7 +# CHECK-INST: xvffintl.d.w $xr2, $xr7 +# CHECK-ENCODING: encoding: [0xe2,0x10,0x9e,0x76] + +xvffinth.d.w $xr7, $xr28 +# CHECK-INST: xvffinth.d.w $xr7, $xr28 +# CHECK-ENCODING: encoding: [0x87,0x17,0x9e,0x76] + +xvffint.s.l $xr10, $xr27, $xr3 +# CHECK-INST: xvffint.s.l $xr10, $xr27, $xr3 +# CHECK-ENCODING: encoding: [0x6a,0x0f,0x48,0x75] diff --git a/llvm/test/MC/LoongArch/lasx/flogb.s b/llvm/test/MC/LoongArch/lasx/flogb.s new file mode 100644 index 0000000000000000000000000000000000000000..cccf618566c3c1f310261ae71664c03d85d34fac --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/flogb.s @@ -0,0 +1,12 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvflogb.s $xr17, $xr12 +# CHECK-INST: xvflogb.s $xr17, $xr12 +# CHECK-ENCODING: encoding: [0x91,0xc5,0x9c,0x76] + +xvflogb.d $xr26, $xr1 +# CHECK-INST: xvflogb.d $xr26, $xr1 +# CHECK-ENCODING: encoding: [0x3a,0xc8,0x9c,0x76] diff --git a/llvm/test/MC/LoongArch/lasx/fmadd.s b/llvm/test/MC/LoongArch/lasx/fmadd.s new file mode 100644 index 0000000000000000000000000000000000000000..c4c3305012809ac25521f2bae04db35d27926ddf --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/fmadd.s @@ -0,0 +1,12 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvfmadd.s $xr5, $xr31, $xr31, $xr27 +# CHECK-INST: xvfmadd.s $xr5, $xr31, $xr31, $xr27 +# CHECK-ENCODING: encoding: [0xe5,0xff,0x1d,0x0a] + +xvfmadd.d $xr9, $xr16, $xr31, $xr25 +# CHECK-INST: xvfmadd.d $xr9, $xr16, $xr31, $xr25 +# CHECK-ENCODING: encoding: [0x09,0xfe,0x2c,0x0a] diff --git a/llvm/test/MC/LoongArch/lasx/fmax.s b/llvm/test/MC/LoongArch/lasx/fmax.s new file mode 100644 index 0000000000000000000000000000000000000000..a5f4f901808d1b8a5f88865fff9247eb6a3cbcb2 --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/fmax.s @@ -0,0 +1,12 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvfmax.s $xr29, $xr24, $xr8 +# CHECK-INST: xvfmax.s $xr29, $xr24, $xr8 +# CHECK-ENCODING: encoding: [0x1d,0xa3,0x3c,0x75] + +xvfmax.d $xr31, $xr25, $xr23 +# CHECK-INST: xvfmax.d $xr31, $xr25, $xr23 +# CHECK-ENCODING: encoding: [0x3f,0x5f,0x3d,0x75] diff --git a/llvm/test/MC/LoongArch/lasx/fmaxa.s b/llvm/test/MC/LoongArch/lasx/fmaxa.s new file mode 100644 index 0000000000000000000000000000000000000000..1181264e418c7b0e96f8564659320e880d00b587 --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/fmaxa.s @@ -0,0 +1,12 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvfmaxa.s $xr15, $xr18, $xr5 +# CHECK-INST: xvfmaxa.s $xr15, $xr18, $xr5 +# CHECK-ENCODING: encoding: [0x4f,0x96,0x40,0x75] + +xvfmaxa.d $xr2, $xr20, $xr29 +# CHECK-INST: xvfmaxa.d $xr2, $xr20, $xr29 +# CHECK-ENCODING: encoding: [0x82,0x76,0x41,0x75] diff --git a/llvm/test/MC/LoongArch/lasx/fmin.s b/llvm/test/MC/LoongArch/lasx/fmin.s new file mode 100644 index 0000000000000000000000000000000000000000..735cdd9ce82c4ea848982501dc3385b9d428337d --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/fmin.s @@ -0,0 +1,12 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvfmin.s $xr31, $xr5, $xr16 +# CHECK-INST: xvfmin.s $xr31, $xr5, $xr16 +# CHECK-ENCODING: encoding: [0xbf,0xc0,0x3e,0x75] + +xvfmin.d $xr13, $xr30, $xr25 +# CHECK-INST: xvfmin.d $xr13, $xr30, $xr25 +# CHECK-ENCODING: encoding: [0xcd,0x67,0x3f,0x75] diff --git a/llvm/test/MC/LoongArch/lasx/fmina.s b/llvm/test/MC/LoongArch/lasx/fmina.s new file mode 100644 index 0000000000000000000000000000000000000000..fbfe44c9452622008f423238d0e4301137ff3d9d --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/fmina.s @@ -0,0 +1,12 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvfmina.s $xr29, $xr27, $xr17 +# CHECK-INST: xvfmina.s $xr29, $xr27, $xr17 +# CHECK-ENCODING: encoding: [0x7d,0xc7,0x42,0x75] + +xvfmina.d $xr12, $xr20, $xr18 +# CHECK-INST: xvfmina.d $xr12, $xr20, $xr18 +# CHECK-ENCODING: encoding: [0x8c,0x4a,0x43,0x75] diff --git a/llvm/test/MC/LoongArch/lasx/fmsub.s b/llvm/test/MC/LoongArch/lasx/fmsub.s new file mode 100644 index 0000000000000000000000000000000000000000..8291d2b75bedf3a8b12f0fa4e7e423058a34fea1 --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/fmsub.s @@ -0,0 +1,12 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvfmsub.s $xr17, $xr3, $xr3, $xr23 +# CHECK-INST: xvfmsub.s $xr17, $xr3, $xr3, $xr23 +# CHECK-ENCODING: encoding: [0x71,0x8c,0x5b,0x0a] + +xvfmsub.d $xr30, $xr15, $xr16, $xr14 +# CHECK-INST: xvfmsub.d $xr30, $xr15, $xr16, $xr14 +# CHECK-ENCODING: encoding: [0xfe,0x41,0x67,0x0a] diff --git a/llvm/test/MC/LoongArch/lasx/fmul.s b/llvm/test/MC/LoongArch/lasx/fmul.s new file mode 100644 index 0000000000000000000000000000000000000000..bff5770170ef5522e6743f60c8f468b9925b8a97 --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/fmul.s @@ -0,0 +1,12 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvfmul.s $xr9, $xr14, $xr30 +# CHECK-INST: xvfmul.s $xr9, $xr14, $xr30 +# CHECK-ENCODING: encoding: [0xc9,0xf9,0x38,0x75] + +xvfmul.d $xr28, $xr26, $xr19 +# CHECK-INST: xvfmul.d $xr28, $xr26, $xr19 +# CHECK-ENCODING: encoding: [0x5c,0x4f,0x39,0x75] diff --git a/llvm/test/MC/LoongArch/lasx/fnmadd.s b/llvm/test/MC/LoongArch/lasx/fnmadd.s new file mode 100644 index 0000000000000000000000000000000000000000..04830c97cf46acbd2ef82b92fff6f11249128948 --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/fnmadd.s @@ -0,0 +1,12 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvfnmadd.s $xr14, $xr22, $xr23, $xr24 +# CHECK-INST: xvfnmadd.s $xr14, $xr22, $xr23, $xr24 +# CHECK-ENCODING: encoding: [0xce,0x5e,0x9c,0x0a] + +xvfnmadd.d $xr1, $xr30, $xr23, $xr12 +# CHECK-INST: xvfnmadd.d $xr1, $xr30, $xr23, $xr12 +# CHECK-ENCODING: encoding: [0xc1,0x5f,0xa6,0x0a] diff --git a/llvm/test/MC/LoongArch/lasx/fnmsub.s b/llvm/test/MC/LoongArch/lasx/fnmsub.s new file mode 100644 index 0000000000000000000000000000000000000000..6a749eb2248e2a29df698b84484caf409e3624e7 --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/fnmsub.s @@ -0,0 +1,12 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvfnmsub.s $xr22, $xr5, $xr4, $xr11 +# CHECK-INST: xvfnmsub.s $xr22, $xr5, $xr4, $xr11 +# CHECK-ENCODING: encoding: [0xb6,0x90,0xd5,0x0a] + +xvfnmsub.d $xr8, $xr0, $xr29, $xr28 +# CHECK-INST: xvfnmsub.d $xr8, $xr0, $xr29, $xr28 +# CHECK-ENCODING: encoding: [0x08,0x74,0xee,0x0a] diff --git a/llvm/test/MC/LoongArch/lasx/frecip.s b/llvm/test/MC/LoongArch/lasx/frecip.s new file mode 100644 index 0000000000000000000000000000000000000000..1bb3ce02fb9c056f3e0621cb7f418edab2403ab1 --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/frecip.s @@ -0,0 +1,12 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvfrecip.s $xr3, $xr16 +# CHECK-INST: xvfrecip.s $xr3, $xr16 +# CHECK-ENCODING: encoding: [0x03,0xf6,0x9c,0x76] + +xvfrecip.d $xr17, $xr24 +# CHECK-INST: xvfrecip.d $xr17, $xr24 +# CHECK-ENCODING: encoding: [0x11,0xfb,0x9c,0x76] diff --git a/llvm/test/MC/LoongArch/lasx/frint.s b/llvm/test/MC/LoongArch/lasx/frint.s new file mode 100644 index 0000000000000000000000000000000000000000..03ab8684f3f7162c52b213ab93564a02aeddef99 --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/frint.s @@ -0,0 +1,44 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvfrintrne.s $xr19, $xr17 +# CHECK-INST: xvfrintrne.s $xr19, $xr17 +# CHECK-ENCODING: encoding: [0x33,0x76,0x9d,0x76] + +xvfrintrne.d $xr12, $xr29 +# CHECK-INST: xvfrintrne.d $xr12, $xr29 +# CHECK-ENCODING: encoding: [0xac,0x7b,0x9d,0x76] + +xvfrintrz.s $xr10, $xr9 +# CHECK-INST: xvfrintrz.s $xr10, $xr9 +# CHECK-ENCODING: encoding: [0x2a,0x65,0x9d,0x76] + +xvfrintrz.d $xr29, $xr5 +# CHECK-INST: xvfrintrz.d $xr29, $xr5 +# CHECK-ENCODING: encoding: [0xbd,0x68,0x9d,0x76] + +xvfrintrp.s $xr26, $xr16 +# CHECK-INST: xvfrintrp.s $xr26, $xr16 +# CHECK-ENCODING: encoding: [0x1a,0x56,0x9d,0x76] + +xvfrintrp.d $xr1, $xr28 +# CHECK-INST: xvfrintrp.d $xr1, $xr28 +# CHECK-ENCODING: encoding: [0x81,0x5b,0x9d,0x76] + +xvfrintrm.s $xr27, $xr13 +# CHECK-INST: xvfrintrm.s $xr27, $xr13 +# CHECK-ENCODING: encoding: [0xbb,0x45,0x9d,0x76] + +xvfrintrm.d $xr14, $xr27 +# CHECK-INST: xvfrintrm.d $xr14, $xr27 +# CHECK-ENCODING: encoding: [0x6e,0x4b,0x9d,0x76] + +xvfrint.s $xr21, $xr24 +# CHECK-INST: xvfrint.s $xr21, $xr24 +# CHECK-ENCODING: encoding: [0x15,0x37,0x9d,0x76] + +xvfrint.d $xr31, $xr18 +# CHECK-INST: xvfrint.d $xr31, $xr18 +# CHECK-ENCODING: encoding: [0x5f,0x3a,0x9d,0x76] diff --git a/llvm/test/MC/LoongArch/lasx/frsqrt.s b/llvm/test/MC/LoongArch/lasx/frsqrt.s new file mode 100644 index 0000000000000000000000000000000000000000..af96e10832dfb13206953a78f7deec2f0c3f4db9 --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/frsqrt.s @@ -0,0 +1,12 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvfrsqrt.s $xr31, $xr25 +# CHECK-INST: xvfrsqrt.s $xr31, $xr25 +# CHECK-ENCODING: encoding: [0x3f,0x07,0x9d,0x76] + +xvfrsqrt.d $xr14, $xr22 +# CHECK-INST: xvfrsqrt.d $xr14, $xr22 +# CHECK-ENCODING: encoding: [0xce,0x0a,0x9d,0x76] diff --git a/llvm/test/MC/LoongArch/lasx/frstp.s b/llvm/test/MC/LoongArch/lasx/frstp.s new file mode 100644 index 0000000000000000000000000000000000000000..b76309f4b0f4503bbfd5ec78dc9d48aad1038f16 --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/frstp.s @@ -0,0 +1,20 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvfrstp.b $xr23, $xr18, $xr18 +# CHECK-INST: xvfrstp.b $xr23, $xr18, $xr18 +# CHECK-ENCODING: encoding: [0x57,0x4a,0x2b,0x75] + +xvfrstp.h $xr13, $xr30, $xr6 +# CHECK-INST: xvfrstp.h $xr13, $xr30, $xr6 +# CHECK-ENCODING: encoding: [0xcd,0x9b,0x2b,0x75] + +xvfrstpi.b $xr24, $xr28, 31 +# CHECK-INST: xvfrstpi.b $xr24, $xr28, 31 +# CHECK-ENCODING: encoding: [0x98,0x7f,0x9a,0x76] + +xvfrstpi.h $xr22, $xr24, 18 +# CHECK-INST: xvfrstpi.h $xr22, $xr24, 18 +# CHECK-ENCODING: encoding: [0x16,0xcb,0x9a,0x76] diff --git a/llvm/test/MC/LoongArch/lasx/fsqrt.s b/llvm/test/MC/LoongArch/lasx/fsqrt.s new file mode 100644 index 0000000000000000000000000000000000000000..6519ad298cb3257fff0e4d99194131e8c2a3a1a9 --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/fsqrt.s @@ -0,0 +1,12 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvfsqrt.s $xr4, $xr27 +# CHECK-INST: xvfsqrt.s $xr4, $xr27 +# CHECK-ENCODING: encoding: [0x64,0xe7,0x9c,0x76] + +xvfsqrt.d $xr26, $xr2 +# CHECK-INST: xvfsqrt.d $xr26, $xr2 +# CHECK-ENCODING: encoding: [0x5a,0xe8,0x9c,0x76] diff --git a/llvm/test/MC/LoongArch/lasx/fsub.s b/llvm/test/MC/LoongArch/lasx/fsub.s new file mode 100644 index 0000000000000000000000000000000000000000..47330258e74eca62e3e8ed4b5b5bb5c8ede0d0b8 --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/fsub.s @@ -0,0 +1,12 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvfsub.s $xr22, $xr0, $xr3 +# CHECK-INST: xvfsub.s $xr22, $xr0, $xr3 +# CHECK-ENCODING: encoding: [0x16,0x8c,0x32,0x75] + +xvfsub.d $xr4, $xr25, $xr15 +# CHECK-INST: xvfsub.d $xr4, $xr25, $xr15 +# CHECK-ENCODING: encoding: [0x24,0x3f,0x33,0x75] diff --git a/llvm/test/MC/LoongArch/lasx/ftint.s b/llvm/test/MC/LoongArch/lasx/ftint.s new file mode 100644 index 0000000000000000000000000000000000000000..0b263fe201e6f6afdc8fe7aa1cda9b9c2f08a5bd --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/ftint.s @@ -0,0 +1,120 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvftintrne.w.s $xr20, $xr13 +# CHECK-INST: xvftintrne.w.s $xr20, $xr13 +# CHECK-ENCODING: encoding: [0xb4,0x51,0x9e,0x76] + +xvftintrne.l.d $xr30, $xr14 +# CHECK-INST: xvftintrne.l.d $xr30, $xr14 +# CHECK-ENCODING: encoding: [0xde,0x55,0x9e,0x76] + +xvftintrz.w.s $xr14, $xr5 +# CHECK-INST: xvftintrz.w.s $xr14, $xr5 +# CHECK-ENCODING: encoding: [0xae,0x48,0x9e,0x76] + +xvftintrz.l.d $xr1, $xr26 +# CHECK-INST: xvftintrz.l.d $xr1, $xr26 +# CHECK-ENCODING: encoding: [0x41,0x4f,0x9e,0x76] + +xvftintrp.w.s $xr18, $xr1 +# CHECK-INST: xvftintrp.w.s $xr18, $xr1 +# CHECK-ENCODING: encoding: [0x32,0x40,0x9e,0x76] + +xvftintrp.l.d $xr10, $xr24 +# CHECK-INST: xvftintrp.l.d $xr10, $xr24 +# CHECK-ENCODING: encoding: [0x0a,0x47,0x9e,0x76] + +xvftintrm.w.s $xr8, $xr23 +# CHECK-INST: xvftintrm.w.s $xr8, $xr23 +# CHECK-ENCODING: encoding: [0xe8,0x3a,0x9e,0x76] + +xvftintrm.l.d $xr12, $xr17 +# CHECK-INST: xvftintrm.l.d $xr12, $xr17 +# CHECK-ENCODING: encoding: [0x2c,0x3e,0x9e,0x76] + +xvftint.w.s $xr11, $xr25 +# CHECK-INST: xvftint.w.s $xr11, $xr25 +# CHECK-ENCODING: encoding: [0x2b,0x33,0x9e,0x76] + +xvftint.l.d $xr7, $xr22 +# CHECK-INST: xvftint.l.d $xr7, $xr22 +# CHECK-ENCODING: encoding: [0xc7,0x36,0x9e,0x76] + +xvftintrz.wu.s $xr13, $xr19 +# CHECK-INST: xvftintrz.wu.s $xr13, $xr19 +# CHECK-ENCODING: encoding: [0x6d,0x72,0x9e,0x76] + +xvftintrz.lu.d $xr24, $xr3 +# CHECK-INST: xvftintrz.lu.d $xr24, $xr3 +# CHECK-ENCODING: encoding: [0x78,0x74,0x9e,0x76] + +xvftint.wu.s $xr14, $xr6 +# CHECK-INST: xvftint.wu.s $xr14, $xr6 +# CHECK-ENCODING: encoding: [0xce,0x58,0x9e,0x76] + +xvftint.lu.d $xr2, $xr2 +# CHECK-INST: xvftint.lu.d $xr2, $xr2 +# CHECK-ENCODING: encoding: [0x42,0x5c,0x9e,0x76] + +xvftintrne.w.d $xr13, $xr20, $xr5 +# CHECK-INST: xvftintrne.w.d $xr13, $xr20, $xr5 +# CHECK-ENCODING: encoding: [0x8d,0x96,0x4b,0x75] + +xvftintrz.w.d $xr13, $xr8, $xr27 +# CHECK-INST: xvftintrz.w.d $xr13, $xr8, $xr27 +# CHECK-ENCODING: encoding: [0x0d,0x6d,0x4b,0x75] + +xvftintrp.w.d $xr14, $xr26, $xr31 +# CHECK-INST: xvftintrp.w.d $xr14, $xr26, $xr31 +# CHECK-ENCODING: encoding: [0x4e,0xff,0x4a,0x75] + +xvftintrm.w.d $xr29, $xr23, $xr7 +# CHECK-INST: xvftintrm.w.d $xr29, $xr23, $xr7 +# CHECK-ENCODING: encoding: [0xfd,0x1e,0x4a,0x75] + +xvftint.w.d $xr7, $xr22, $xr29 +# CHECK-INST: xvftint.w.d $xr7, $xr22, $xr29 +# CHECK-ENCODING: encoding: [0xc7,0xf6,0x49,0x75] + +xvftintrnel.l.s $xr31, $xr28 +# CHECK-INST: xvftintrnel.l.s $xr31, $xr28 +# CHECK-ENCODING: encoding: [0x9f,0xa3,0x9e,0x76] + +xvftintrneh.l.s $xr16, $xr29 +# CHECK-INST: xvftintrneh.l.s $xr16, $xr29 +# CHECK-ENCODING: encoding: [0xb0,0xa7,0x9e,0x76] + +xvftintrzl.l.s $xr27, $xr29 +# CHECK-INST: xvftintrzl.l.s $xr27, $xr29 +# CHECK-ENCODING: encoding: [0xbb,0x9b,0x9e,0x76] + +xvftintrzh.l.s $xr14, $xr10 +# CHECK-INST: xvftintrzh.l.s $xr14, $xr10 +# CHECK-ENCODING: encoding: [0x4e,0x9d,0x9e,0x76] + +xvftintrpl.l.s $xr14, $xr0 +# CHECK-INST: xvftintrpl.l.s $xr14, $xr0 +# CHECK-ENCODING: encoding: [0x0e,0x90,0x9e,0x76] + +xvftintrph.l.s $xr23, $xr0 +# CHECK-INST: xvftintrph.l.s $xr23, $xr0 +# CHECK-ENCODING: encoding: [0x17,0x94,0x9e,0x76] + +xvftintrml.l.s $xr22, $xr15 +# CHECK-INST: xvftintrml.l.s $xr22, $xr15 +# CHECK-ENCODING: encoding: [0xf6,0x89,0x9e,0x76] + +xvftintrmh.l.s $xr10, $xr19 +# CHECK-INST: xvftintrmh.l.s $xr10, $xr19 +# CHECK-ENCODING: encoding: [0x6a,0x8e,0x9e,0x76] + +xvftintl.l.s $xr31, $xr11 +# CHECK-INST: xvftintl.l.s $xr31, $xr11 +# CHECK-ENCODING: encoding: [0x7f,0x81,0x9e,0x76] + +xvftinth.l.s $xr15, $xr5 +# CHECK-INST: xvftinth.l.s $xr15, $xr5 +# CHECK-ENCODING: encoding: [0xaf,0x84,0x9e,0x76] diff --git a/llvm/test/MC/LoongArch/lasx/haddw.s b/llvm/test/MC/LoongArch/lasx/haddw.s new file mode 100644 index 0000000000000000000000000000000000000000..639669e510013da9f91f842ba8f068cdd26d0aaf --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/haddw.s @@ -0,0 +1,36 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvhaddw.h.b $xr31, $xr19, $xr29 +# CHECK-INST: xvhaddw.h.b $xr31, $xr19, $xr29 +# CHECK-ENCODING: encoding: [0x7f,0x76,0x54,0x74] + +xvhaddw.w.h $xr31, $xr16, $xr23 +# CHECK-INST: xvhaddw.w.h $xr31, $xr16, $xr23 +# CHECK-ENCODING: encoding: [0x1f,0xde,0x54,0x74] + +xvhaddw.d.w $xr30, $xr1, $xr24 +# CHECK-INST: xvhaddw.d.w $xr30, $xr1, $xr24 +# CHECK-ENCODING: encoding: [0x3e,0x60,0x55,0x74] + +xvhaddw.q.d $xr16, $xr15, $xr17 +# CHECK-INST: xvhaddw.q.d $xr16, $xr15, $xr17 +# CHECK-ENCODING: encoding: [0xf0,0xc5,0x55,0x74] + +xvhaddw.hu.bu $xr14, $xr17, $xr2 +# CHECK-INST: xvhaddw.hu.bu $xr14, $xr17, $xr2 +# CHECK-ENCODING: encoding: [0x2e,0x0a,0x58,0x74] + +xvhaddw.wu.hu $xr21, $xr2, $xr8 +# CHECK-INST: xvhaddw.wu.hu $xr21, $xr2, $xr8 +# CHECK-ENCODING: encoding: [0x55,0xa0,0x58,0x74] + +xvhaddw.du.wu $xr6, $xr24, $xr19 +# CHECK-INST: xvhaddw.du.wu $xr6, $xr24, $xr19 +# CHECK-ENCODING: encoding: [0x06,0x4f,0x59,0x74] + +xvhaddw.qu.du $xr10, $xr12, $xr13 +# CHECK-INST: xvhaddw.qu.du $xr10, $xr12, $xr13 +# CHECK-ENCODING: encoding: [0x8a,0xb5,0x59,0x74] diff --git a/llvm/test/MC/LoongArch/lasx/hsubw.s b/llvm/test/MC/LoongArch/lasx/hsubw.s new file mode 100644 index 0000000000000000000000000000000000000000..078812f33e0d3201b2d97547fb56107982fac104 --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/hsubw.s @@ -0,0 +1,36 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvhsubw.h.b $xr22, $xr7, $xr16 +# CHECK-INST: xvhsubw.h.b $xr22, $xr7, $xr16 +# CHECK-ENCODING: encoding: [0xf6,0x40,0x56,0x74] + +xvhsubw.w.h $xr19, $xr8, $xr15 +# CHECK-INST: xvhsubw.w.h $xr19, $xr8, $xr15 +# CHECK-ENCODING: encoding: [0x13,0xbd,0x56,0x74] + +xvhsubw.d.w $xr30, $xr23, $xr19 +# CHECK-INST: xvhsubw.d.w $xr30, $xr23, $xr19 +# CHECK-ENCODING: encoding: [0xfe,0x4e,0x57,0x74] + +xvhsubw.q.d $xr20, $xr13, $xr28 +# CHECK-INST: xvhsubw.q.d $xr20, $xr13, $xr28 +# CHECK-ENCODING: encoding: [0xb4,0xf1,0x57,0x74] + +xvhsubw.hu.bu $xr10, $xr2, $xr16 +# CHECK-INST: xvhsubw.hu.bu $xr10, $xr2, $xr16 +# CHECK-ENCODING: encoding: [0x4a,0x40,0x5a,0x74] + +xvhsubw.wu.hu $xr1, $xr26, $xr18 +# CHECK-INST: xvhsubw.wu.hu $xr1, $xr26, $xr18 +# CHECK-ENCODING: encoding: [0x41,0xcb,0x5a,0x74] + +xvhsubw.du.wu $xr5, $xr23, $xr20 +# CHECK-INST: xvhsubw.du.wu $xr5, $xr23, $xr20 +# CHECK-ENCODING: encoding: [0xe5,0x52,0x5b,0x74] + +xvhsubw.qu.du $xr31, $xr4, $xr8 +# CHECK-INST: xvhsubw.qu.du $xr31, $xr4, $xr8 +# CHECK-ENCODING: encoding: [0x9f,0xa0,0x5b,0x74] diff --git a/llvm/test/MC/LoongArch/lasx/ilv.s b/llvm/test/MC/LoongArch/lasx/ilv.s new file mode 100644 index 0000000000000000000000000000000000000000..ebdc8d8518555ed2e8b6867224c7b7f745596e93 --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/ilv.s @@ -0,0 +1,36 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvilvl.b $xr29, $xr14, $xr0 +# CHECK-INST: xvilvl.b $xr29, $xr14, $xr0 +# CHECK-ENCODING: encoding: [0xdd,0x01,0x1a,0x75] + +xvilvl.h $xr30, $xr9, $xr21 +# CHECK-INST: xvilvl.h $xr30, $xr9, $xr21 +# CHECK-ENCODING: encoding: [0x3e,0xd5,0x1a,0x75] + +xvilvl.w $xr24, $xr22, $xr9 +# CHECK-INST: xvilvl.w $xr24, $xr22, $xr9 +# CHECK-ENCODING: encoding: [0xd8,0x26,0x1b,0x75] + +xvilvl.d $xr25, $xr20, $xr10 +# CHECK-INST: xvilvl.d $xr25, $xr20, $xr10 +# CHECK-ENCODING: encoding: [0x99,0xaa,0x1b,0x75] + +xvilvh.b $xr19, $xr22, $xr26 +# CHECK-INST: xvilvh.b $xr19, $xr22, $xr26 +# CHECK-ENCODING: encoding: [0xd3,0x6a,0x1c,0x75] + +xvilvh.h $xr10, $xr23, $xr7 +# CHECK-INST: xvilvh.h $xr10, $xr23, $xr7 +# CHECK-ENCODING: encoding: [0xea,0x9e,0x1c,0x75] + +xvilvh.w $xr5, $xr0, $xr30 +# CHECK-INST: xvilvh.w $xr5, $xr0, $xr30 +# CHECK-ENCODING: encoding: [0x05,0x78,0x1d,0x75] + +xvilvh.d $xr24, $xr2, $xr2 +# CHECK-INST: xvilvh.d $xr24, $xr2, $xr2 +# CHECK-ENCODING: encoding: [0x58,0x88,0x1d,0x75] diff --git a/llvm/test/MC/LoongArch/lasx/insgr2vr.s b/llvm/test/MC/LoongArch/lasx/insgr2vr.s new file mode 100644 index 0000000000000000000000000000000000000000..8c23c1543778e8e982b6117d401116d886a1167b --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/insgr2vr.s @@ -0,0 +1,12 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvinsgr2vr.w $xr25, $r30, 7 +# CHECK-INST: xvinsgr2vr.w $xr25, $s7, 7 +# CHECK-ENCODING: encoding: [0xd9,0xdf,0xeb,0x76] + +xvinsgr2vr.d $xr27, $r21, 1 +# CHECK-INST: xvinsgr2vr.d $xr27, $r21, 1 +# CHECK-ENCODING: encoding: [0xbb,0xe6,0xeb,0x76] diff --git a/llvm/test/MC/LoongArch/lasx/insve0.s b/llvm/test/MC/LoongArch/lasx/insve0.s new file mode 100644 index 0000000000000000000000000000000000000000..5b77a23a21ff0f595325843438b1ee07f09347a7 --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/insve0.s @@ -0,0 +1,12 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvinsve0.w $xr6, $xr1, 7 +# CHECK-INST: xvinsve0.w $xr6, $xr1, 7 +# CHECK-ENCODING: encoding: [0x26,0xdc,0xff,0x76] + +xvinsve0.d $xr28, $xr1, 0 +# CHECK-INST: xvinsve0.d $xr28, $xr1, 0 +# CHECK-ENCODING: encoding: [0x3c,0xe0,0xff,0x76] diff --git a/llvm/test/MC/LoongArch/lasx/invalid-imm.s b/llvm/test/MC/LoongArch/lasx/invalid-imm.s new file mode 100644 index 0000000000000000000000000000000000000000..6f64a6f87802be967a1444c41fade9db1dec5484 --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/invalid-imm.s @@ -0,0 +1,1192 @@ +## Test out of range immediates which are used by lasx instructions. + +# RUN: not llvm-mc --triple=loongarch64 %s 2>&1 | FileCheck %s + +## uimm1 +xvrepl128vei.d $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 1] + +xvrepl128vei.d $xr0, $xr1, 2 +# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 1] + +## uimm2 +xvpickve.d $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 3] + +xvpickve.d $xr0, $xr1, 4 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 3] + +xvinsve0.d $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 3] + +xvinsve0.d $xr0, $xr1, 4 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 3] + +xvinsgr2vr.d $xr0, $a0, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 3] + +xvinsgr2vr.d $xr0, $a0, 4 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 3] + +xvpickve2gr.d $a0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 3] + +xvpickve2gr.d $a0, $xr1, 4 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 3] + +xvpickve2gr.du $a0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 3] + +xvpickve2gr.du $a0, $xr1, 4 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 3] + +xvstelm.d $xr0, $a0, 8, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 3] + +xvstelm.d $xr0, $a0, 8, 4 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 3] + +xvrepl128vei.w $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 3] + +xvrepl128vei.w $xr0, $xr1, 4 +# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 3] + +## uimm3 +xvpickve.w $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] + +xvpickve.w $xr0, $xr1, 8 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] + +xvinsve0.w $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] + +xvinsve0.w $xr0, $xr1, 8 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] + +xvinsgr2vr.w $xr0, $a0, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 7] + +xvinsgr2vr.w $xr0, $a0, 8 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 7] + +xvpickve2gr.wu $a0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 7] + +xvpickve2gr.wu $a0, $xr1, 8 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 7] + +xvpickve2gr.w $a0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 7] + +xvpickve2gr.w $a0, $xr1, 8 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 7] + +xvstelm.w $xr0, $a0, 4, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 7] + +xvstelm.w $xr0, $a0, 4, 8 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 7] + +xvrepl128vei.h $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 7] + +xvrepl128vei.h $xr0, $xr1, 8 +# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 7] + +xvbitrevi.b $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 7] + +xvbitrevi.b $xr0, $xr1, 8 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 7] + +xvbitseti.b $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 7] + +xvbitseti.b $xr0, $xr1, 8 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 7] + +xvbitclri.b $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 7] + +xvbitclri.b $xr0, $xr1, 8 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 7] + +xvsrari.b $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 7] + +xvsrari.b $xr0, $xr1, 8 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 7] + +xvsrlri.b $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 7] + +xvsrlri.b $xr0, $xr1, 8 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 7] + +xvsllwil.hu.bu $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 7] + +xvsllwil.hu.bu $xr0, $xr1, 8 +# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 7] + +xvsllwil.h.b $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 7] + +xvsllwil.h.b $xr0, $xr1, 8 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 7] + +xvrotri.b $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 7] + +xvrotri.b $xr0, $xr1, 8 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 7] + +xvsrai.b $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 7] + +xvsrai.b $xr0, $xr1, 8 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 7] + +xvsrli.b $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 7] + +xvsrli.b $xr0, $xr1, 8 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 7] + +xvslli.b $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 7] + +xvslli.b $xr0, $xr1, 8 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 7] + +xvsat.b $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 7] + +xvsat.b $xr0, $xr1, 8 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 7] + +xvsat.bu $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 7] + +xvsat.bu $xr0, $xr1, 8 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 7] + +## uimm4 +xvstelm.h $xr0, $a0, 2, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] + +xvstelm.h $xr0, $a0, 2, 16 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] + +xvrepl128vei.b $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 15] + +xvrepl128vei.b $xr0, $xr1, 16 +# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 15] + +xvbitrevi.h $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] + +xvbitrevi.h $xr0, $xr1, 16 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] + +xvbitseti.h $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] + +xvbitseti.h $xr0, $xr1, 16 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] + +xvbitclri.h $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] + +xvbitclri.h $xr0, $xr1, 16 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] + +xvssrarni.bu.h $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 15] + +xvssrarni.bu.h $xr0, $xr1, 16 +# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 15] + +xvssrlrni.bu.h $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 15] + +xvssrlrni.bu.h $xr0, $xr1, 16 +# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 15] + +xvssrarni.b.h $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 15] + +xvssrarni.b.h $xr0, $xr1, 16 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 15] + +xvssrlrni.b.h $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 15] + +xvssrlrni.b.h $xr0, $xr1, 16 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 15] + +xvssrani.bu.h $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 15] + +xvssrani.bu.h $xr0, $xr1, 16 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 15] + +xvssrlni.bu.h $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 15] + +xvssrlni.bu.h $xr0, $xr1, 16 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 15] + +xvssrani.b.h $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] + +xvssrani.b.h $xr0, $xr1, 16 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] + +xvssrlni.b.h $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] + +xvssrlni.b.h $xr0, $xr1, 16 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] + +xvsrarni.b.h $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] + +xvsrarni.b.h $xr0, $xr1, 16 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] + +xvsrlrni.b.h $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] + +xvsrlrni.b.h $xr0, $xr1, 16 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] + +xvsrani.b.h $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] + +xvsrani.b.h $xr0, $xr1, 16 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] + +xvsrlni.b.h $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] + +xvsrlni.b.h $xr0, $xr1, 16 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] + +xvsrari.h $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 15] + +xvsrari.h $xr0, $xr1, 16 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 15] + +xvsrlri.h $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 15] + +xvsrlri.h $xr0, $xr1, 16 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 15] + +xvsllwil.wu.hu $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 15] + +xvsllwil.wu.hu $xr0, $xr1, 16 +# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 15] + +xvsllwil.w.h $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] + +xvsllwil.w.h $xr0, $xr1, 16 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] + +xvrotri.h $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 15] + +xvrotri.h $xr0, $xr1, 16 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 15] + +xvsrai.h $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 15] + +xvsrai.h $xr0, $xr1, 16 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 15] + +xvsrli.h $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 15] + +xvsrli.h $xr0, $xr1, 16 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 15] + +xvslli.h $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 15] + +xvslli.h $xr0, $xr1, 16 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 15] + +xvsat.h $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 15] + +xvsat.h $xr0, $xr1, 16 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 15] + +xvsat.hu $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 15] + +xvsat.hu $xr0, $xr1, 16 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 15] + +## uimm5 +xvstelm.b $xr0, $a0, 1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] + +xvstelm.b $xr0, $a0, 1, 32 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] + +xvbsrl.v $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +xvbsrl.v $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +xvbsll.v $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +xvbsll.v $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +xvslti.du $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvslti.du $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvslti.wu $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvslti.wu $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvslti.hu $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvslti.hu $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvslti.bu $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvslti.bu $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvslei.du $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvslei.du $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvslei.wu $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvslei.wu $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvslei.hu $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvslei.hu $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvslei.bu $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvslei.bu $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvfrstpi.h $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 31] + +xvfrstpi.h $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 31] + +xvfrstpi.b $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 31] + +xvfrstpi.b $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 31] + +xvbitrevi.w $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] + +xvbitrevi.w $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] + +xvbitseti.w $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] + +xvbitseti.w $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] + +xvbitclri.w $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] + +xvbitclri.w $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] + +xvssrarni.hu.w $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 31] + +xvssrarni.hu.w $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 31] + +xvssrlrni.hu.w $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 31] + +xvssrlrni.hu.w $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 31] + +xvssrarni.h.w $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 31] + +xvssrarni.h.w $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 31] + +xvssrlrni.h.w $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 31] + +xvssrlrni.h.w $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 31] + +xvssrani.hu.w $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 31] + +xvssrani.hu.w $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 31] + +xvssrlni.hu.w $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 31] + +xvssrlni.hu.w $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 31] + +xvssrani.h.w $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] + +xvssrani.h.w $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] + +xvssrlni.h.w $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] + +xvssrlni.h.w $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] + +xvsrarni.h.w $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] + +xvsrarni.h.w $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] + +xvsrlrni.h.w $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] + +xvsrlrni.h.w $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] + +xvsrani.h.w $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] + +xvsrani.h.w $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] + +xvsrlni.h.w $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] + +xvsrlni.h.w $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] + +xvsrari.w $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvsrari.w $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvsrlri.w $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvsrlri.w $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvsllwil.du.wu $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 31] + +xvsllwil.du.wu $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 31] + +xvsllwil.d.w $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] + +xvsllwil.d.w $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] + +xvrotri.w $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvrotri.w $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvsrai.w $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +xvsrai.w $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +xvsrli.w $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +xvsrli.w $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +xvslli.w $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +xvslli.w $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +xvaddi.bu $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvaddi.bu $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvaddi.hu $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvaddi.hu $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvaddi.wu $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvaddi.wu $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvaddi.du $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvaddi.du $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvsubi.bu $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvsubi.bu $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvsubi.hu $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvsubi.hu $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvsubi.wu $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvsubi.wu $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvsubi.du $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvsubi.du $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvmaxi.bu $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvmaxi.bu $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvmaxi.hu $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvmaxi.hu $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvmaxi.wu $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvmaxi.wu $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvmaxi.du $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvmaxi.du $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvmini.bu $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvmini.bu $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvmini.hu $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvmini.hu $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvmini.wu $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvmini.wu $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvmini.du $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvmini.du $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +xvsat.w $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 31] + +xvsat.w $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 31] + +xvsat.wu $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +xvsat.wu $xr0, $xr1, 32 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +## simm5 +xvslti.d $xr0, $xr1, -17 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] + +xvslti.d $xr0, $xr1, 16 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] + +xvslti.w $xr0, $xr1, -17 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] + +xvslti.w $xr0, $xr1, 16 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] + +xvslti.h $xr0, $xr1, -17 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] + +xvslti.h $xr0, $xr1, 16 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] + +xvslti.b $xr0, $xr1, -17 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] + +xvslti.b $xr0, $xr1, 16 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] + +xvslei.d $xr0, $xr1, -17 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] + +xvslei.d $xr0, $xr1, 16 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] + +xvslei.w $xr0, $xr1, -17 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] + +xvslei.w $xr0, $xr1, 16 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] + +xvslei.h $xr0, $xr1, -17 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] + +xvslei.h $xr0, $xr1, 16 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] + +xvslei.b $xr0, $xr1, -17 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] + +xvslei.b $xr0, $xr1, 16 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] + +xvseqi.d $xr0, $xr1, -17 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] + +xvseqi.d $xr0, $xr1, 16 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] + +xvseqi.w $xr0, $xr1, -17 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] + +xvseqi.w $xr0, $xr1, 16 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] + +xvseqi.h $xr0, $xr1, -17 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] + +xvseqi.h $xr0, $xr1, 16 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] + +xvseqi.b $xr0, $xr1, -17 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] + +xvseqi.b $xr0, $xr1, 16 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] + +xvmaxi.b $xr0, $xr1, -17 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] + +xvmaxi.b $xr0, $xr1, 16 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] + +xvmaxi.h $xr0, $xr1, -17 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] + +xvmaxi.h $xr0, $xr1, 16 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] + +xvmaxi.w $xr0, $xr1, -17 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] + +xvmaxi.w $xr0, $xr1, 16 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] + +xvmaxi.d $xr0, $xr1, -17 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] + +xvmaxi.d $xr0, $xr1, 16 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] + +xvmini.b $xr0, $xr1, -17 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] + +xvmini.b $xr0, $xr1, 16 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] + +xvmini.h $xr0, $xr1, -17 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] + +xvmini.h $xr0, $xr1, 16 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] + +xvmini.w $xr0, $xr1, -17 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] + +xvmini.w $xr0, $xr1, 16 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] + +xvmini.d $xr0, $xr1, -17 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] + +xvmini.d $xr0, $xr1, 16 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-16, 15] + +## uimm6 +xvbitrevi.d $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] + +xvbitrevi.d $xr0, $xr1, 64 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] + +xvbitseti.d $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] + +xvbitseti.d $xr0, $xr1, 64 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] + +xvbitclri.d $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] + +xvbitclri.d $xr0, $xr1, 64 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] + +xvssrarni.wu.d $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 63] + +xvssrarni.wu.d $xr0, $xr1, 64 +# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 63] + +xvssrlrni.wu.d $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 63] + +xvssrlrni.wu.d $xr0, $xr1, 64 +# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 63] + +xvssrarni.w.d $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 63] + +xvssrarni.w.d $xr0, $xr1, 64 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 63] + +xvssrlrni.w.d $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 63] + +xvssrlrni.w.d $xr0, $xr1, 64 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 63] + +xvssrani.wu.d $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 63] + +xvssrani.wu.d $xr0, $xr1, 64 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 63] + +xvssrlni.wu.d $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 63] + +xvssrlni.wu.d $xr0, $xr1, 64 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 63] + +xvssrani.w.d $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] + +xvssrani.w.d $xr0, $xr1, 64 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] + +xvssrlni.w.d $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] + +xvssrlni.w.d $xr0, $xr1, 64 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] + +xvsrarni.w.d $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] + +xvsrarni.w.d $xr0, $xr1, 64 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] + +xvsrlrni.w.d $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] + +xvsrlrni.w.d $xr0, $xr1, 64 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] + +xvsrani.w.d $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] + +xvsrani.w.d $xr0, $xr1, 64 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] + +xvsrlni.w.d $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] + +xvsrlni.w.d $xr0, $xr1, 64 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] + +xvsrari.d $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 63] + +xvsrari.d $xr0, $xr1, 64 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 63] + +xvsrlri.d $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 63] + +xvsrlri.d $xr0, $xr1, 64 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 63] + +xvrotri.d $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 63] + +xvrotri.d $xr0, $xr1, 64 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 63] + +xvsrai.d $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 63] + +xvsrai.d $xr0, $xr1, 64 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 63] + +xvsrli.d $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 63] + +xvsrli.d $xr0, $xr1, 64 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 63] + +xvslli.d $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 63] + +xvslli.d $xr0, $xr1, 64 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 63] + +xvsat.d $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 63] + +xvsat.d $xr0, $xr1, 64 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 63] + +xvsat.du $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 63] + +xvsat.du $xr0, $xr1, 64 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 63] + +## uimm7 +xvssrarni.du.q $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 127] + +xvssrarni.du.q $xr0, $xr1, 128 +# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 127] + +xvssrlrni.du.q $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 127] + +xvssrlrni.du.q $xr0, $xr1, 128 +# CHECK: :[[#@LINE-1]]:28: error: immediate must be an integer in the range [0, 127] + +xvssrarni.d.q $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 127] + +xvssrarni.d.q $xr0, $xr1, 128 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 127] + +xvssrlrni.d.q $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 127] + +xvssrlrni.d.q $xr0, $xr1, 128 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 127] + +xvssrani.du.q $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 127] + +xvssrani.du.q $xr0, $xr1, 128 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 127] + +xvssrlni.du.q $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 127] + +xvssrlni.du.q $xr0, $xr1, 128 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 127] + +xvssrani.d.q $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] + +xvssrani.d.q $xr0, $xr1, 128 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] + +xvssrlni.d.q $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] + +xvssrlni.d.q $xr0, $xr1, 128 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] + +xvsrarni.d.q $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] + +xvsrarni.d.q $xr0, $xr1, 128 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] + +xvsrlrni.d.q $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] + +xvsrlrni.d.q $xr0, $xr1, 128 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] + +xvsrani.d.q $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 127] + +xvsrani.d.q $xr0, $xr1, 128 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 127] + +xvsrlni.d.q $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 127] + +xvsrlni.d.q $xr0, $xr1, 128 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 127] + +## uimm8 +xvextrins.d $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 255] + +xvextrins.d $xr0, $xr1, 256 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 255] + +xvextrins.w $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 255] + +xvextrins.w $xr0, $xr1, 256 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 255] + +xvextrins.h $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 255] + +xvextrins.h $xr0, $xr1, 256 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 255] + +xvextrins.b $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 255] + +xvextrins.b $xr0, $xr1, 256 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 255] + +xvpermi.q $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 255] + +xvpermi.q $xr0, $xr1, 256 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 255] + +xvpermi.d $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 255] + +xvpermi.d $xr0, $xr1, 256 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 255] + +xvpermi.w $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 255] + +xvpermi.w $xr0, $xr1, 256 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 255] + +xvshuf4i.d $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] + +xvshuf4i.d $xr0, $xr1, 256 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] + +xvshuf4i.w $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] + +xvshuf4i.w $xr0, $xr1, 256 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] + +xvshuf4i.h $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] + +xvshuf4i.h $xr0, $xr1, 256 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] + +xvshuf4i.b $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] + +xvshuf4i.b $xr0, $xr1, 256 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] + +xvbitseli.b $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 255] + +xvbitseli.b $xr0, $xr1, 256 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 255] + +xvandi.b $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 255] + +xvandi.b $xr0, $xr1, 256 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 255] + +xvori.b $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 255] + +xvori.b $xr0, $xr1, 256 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 255] + +xvxori.b $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 255] + +xvxori.b $xr0, $xr1, 256 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 255] + +xvnori.b $xr0, $xr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 255] + +xvnori.b $xr0, $xr1, 256 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 255] + +## simm8 +xvstelm.b $xr0, $a0, -129, 1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-128, 127] + +xvstelm.b $xr0, $a0, 128, 1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-128, 127] + +## simm8_lsl1 +xvstelm.h $xr0, $a0, -258, 1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be a multiple of 2 in the range [-256, 254] + +xvstelm.h $xr0, $a0, 256, 1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be a multiple of 2 in the range [-256, 254] + +## simm8_lsl2 +xvstelm.w $xr0, $a0, -516, 1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be a multiple of 4 in the range [-512, 508] + +xvstelm.w $xr0, $a0, 512, 1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be a multiple of 4 in the range [-512, 508] + +## simm8_lsl3 +xvstelm.d $xr0, $a0, -1032, 1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be a multiple of 8 in the range [-1024, 1016] + +xvstelm.d $xr0, $a0, 1024, 1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be a multiple of 8 in the range [-1024, 1016] + +## simm9_lsl3 +xvldrepl.d $xr0, $a0, -2056 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be a multiple of 8 in the range [-2048, 2040] + +xvldrepl.d $xr0, $a0, 2048 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be a multiple of 8 in the range [-2048, 2040] + +## simm10_lsl2 +xvldrepl.w $xr0, $a0, -2052 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be a multiple of 4 in the range [-2048, 2044] + +xvldrepl.w $xr0, $a0, 2048 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be a multiple of 4 in the range [-2048, 2044] + +## simm10 +xvrepli.b $xr0, -513 +# CHECK: :[[#@LINE-1]]:17: error: immediate must be an integer in the range [-512, 511] + +xvrepli.b $xr0, 512 +# CHECK: :[[#@LINE-1]]:17: error: immediate must be an integer in the range [-512, 511] + +xvrepli.h $xr0, -513 +# CHECK: :[[#@LINE-1]]:17: error: immediate must be an integer in the range [-512, 511] + +xvrepli.h $xr0, 512 +# CHECK: :[[#@LINE-1]]:17: error: immediate must be an integer in the range [-512, 511] + +xvrepli.w $xr0, -513 +# CHECK: :[[#@LINE-1]]:17: error: immediate must be an integer in the range [-512, 511] + +xvrepli.w $xr0, 512 +# CHECK: :[[#@LINE-1]]:17: error: immediate must be an integer in the range [-512, 511] + +xvrepli.d $xr0, -513 +# CHECK: :[[#@LINE-1]]:17: error: immediate must be an integer in the range [-512, 511] + +xvrepli.d $xr0, 512 +# CHECK: :[[#@LINE-1]]:17: error: immediate must be an integer in the range [-512, 511] + +## simm11_lsl1 +xvldrepl.h $xr0, $a0, -2050 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be a multiple of 2 in the range [-2048, 2046] + +xvldrepl.h $xr0, $a0, 2048 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be a multiple of 2 in the range [-2048, 2046] + +## simm12 +xvldrepl.b $xr0, $a0, -2049 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [-2048, 2047] + +xvldrepl.b $xr0, $a0, 2048 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [-2048, 2047] + +xvst $xr0, $a0, -2049 +# CHECK: :[[#@LINE-1]]:17: error: immediate must be an integer in the range [-2048, 2047] + +xvst $xr0, $a0, 2048 +# CHECK: :[[#@LINE-1]]:17: error: immediate must be an integer in the range [-2048, 2047] + +xvld $xr0, $a0, -2049 +# CHECK: :[[#@LINE-1]]:17: error: immediate must be an integer in the range [-2048, 2047] + +xvld $xr0, $a0, 2048 +# CHECK: :[[#@LINE-1]]:17: error: immediate must be an integer in the range [-2048, 2047] + +## simm13 +xvldi $xr0, -4097 +# CHECK: :[[#@LINE-1]]:13: error: immediate must be an integer in the range [-4096, 4095] + +xvldi $xr0, 4096 +# CHECK: :[[#@LINE-1]]:13: error: immediate must be an integer in the range [-4096, 4095] diff --git a/llvm/test/MC/LoongArch/lasx/ld.s b/llvm/test/MC/LoongArch/lasx/ld.s new file mode 100644 index 0000000000000000000000000000000000000000..70db8d4f7f42f361c55fdf4c227f34cbb0184ef5 --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/ld.s @@ -0,0 +1,12 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvld $xr3, $r3, -658 +# CHECK-INST: xvld $xr3, $sp, -658 +# CHECK-ENCODING: encoding: [0x63,0xb8,0xb5,0x2c] + +xvldx $xr23, $r9, $r14 +# CHECK-INST: xvldx $xr23, $a5, $t2 +# CHECK-ENCODING: encoding: [0x37,0x39,0x48,0x38] diff --git a/llvm/test/MC/LoongArch/lasx/ldi.s b/llvm/test/MC/LoongArch/lasx/ldi.s new file mode 100644 index 0000000000000000000000000000000000000000..5b4bd2dcd2dcf86a13c273f449151c41cc1f6fae --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/ldi.s @@ -0,0 +1,29 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-OBJ + +xvldi $xr31, 3206 +# CHECK-INST: xvldi $xr31, 3206 +# CHECK-ENCODING: encoding: [0xdf,0x90,0xe1,0x77] +# CHECK-OBJ: vldi $xr31, 3206 + +xvrepli.b $xr26, -512 +# CHECK-INST: vrepli.b $xr26, -512 +# CHECK-ENCODING: encoding: [0x1a,0x40,0xe0,0x77] +# CHECK-OBJ: vldi $xr26, 512 + +xvrepli.h $xr26, -512 +# CHECK-INST: vrepli.h $xr26, -512 +# CHECK-ENCODING: encoding: [0x1a,0xc0,0xe0,0x77] +# CHECK-OBJ: vldi $xr26, 1536 + +xvrepli.w $xr26, -512 +# CHECK-INST: vrepli.w $xr26, -512 +# CHECK-ENCODING: encoding: [0x1a,0x40,0xe1,0x77] +# CHECK-OBJ: vldi $xr26, 2560 + +xvrepli.d $xr26, -512 +# CHECK-INST: vrepli.d $xr26, -512 +# CHECK-ENCODING: encoding: [0x1a,0xc0,0xe1,0x77] +# CHECK-OBJ: vldi $xr26, 3584 diff --git a/llvm/test/MC/LoongArch/lasx/ldrepl.s b/llvm/test/MC/LoongArch/lasx/ldrepl.s new file mode 100644 index 0000000000000000000000000000000000000000..3fd8ec406dc44048d1bc88930ad1366dbcf46a7d --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/ldrepl.s @@ -0,0 +1,20 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvldrepl.b $xr19, $r21, 1892 +# CHECK-INST: xvldrepl.b $xr19, $r21, 1892 +# CHECK-ENCODING: encoding: [0xb3,0x92,0x9d,0x32] + +xvldrepl.h $xr0, $r17, 1762 +# CHECK-INST: xvldrepl.h $xr0, $t5, 1762 +# CHECK-ENCODING: encoding: [0x20,0xc6,0x4d,0x32] + +xvldrepl.w $xr11, $r26, -1524 +# CHECK-INST: xvldrepl.w $xr11, $s3, -1524 +# CHECK-ENCODING: encoding: [0x4b,0x0f,0x2a,0x32] + +xvldrepl.d $xr28, $r12, 1976 +# CHECK-INST: xvldrepl.d $xr28, $t0, 1976 +# CHECK-ENCODING: encoding: [0x9c,0xdd,0x13,0x32] diff --git a/llvm/test/MC/LoongArch/lasx/madd.s b/llvm/test/MC/LoongArch/lasx/madd.s new file mode 100644 index 0000000000000000000000000000000000000000..8f61793731fca7e2f9266a75bf8c4080d37f0829 --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/madd.s @@ -0,0 +1,20 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvmadd.b $xr5, $xr31, $xr8 +# CHECK-INST: xvmadd.b $xr5, $xr31, $xr8 +# CHECK-ENCODING: encoding: [0xe5,0x23,0xa8,0x74] + +xvmadd.h $xr4, $xr0, $xr28 +# CHECK-INST: xvmadd.h $xr4, $xr0, $xr28 +# CHECK-ENCODING: encoding: [0x04,0xf0,0xa8,0x74] + +xvmadd.w $xr2, $xr13, $xr24 +# CHECK-INST: xvmadd.w $xr2, $xr13, $xr24 +# CHECK-ENCODING: encoding: [0xa2,0x61,0xa9,0x74] + +xvmadd.d $xr19, $xr8, $xr18 +# CHECK-INST: xvmadd.d $xr19, $xr8, $xr18 +# CHECK-ENCODING: encoding: [0x13,0xc9,0xa9,0x74] diff --git a/llvm/test/MC/LoongArch/lasx/maddw.s b/llvm/test/MC/LoongArch/lasx/maddw.s new file mode 100644 index 0000000000000000000000000000000000000000..af873fb1ac34939b2a5a37de9476b4682259e44f --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/maddw.s @@ -0,0 +1,100 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvmaddwev.h.b $xr25, $xr15, $xr9 +# CHECK-INST: xvmaddwev.h.b $xr25, $xr15, $xr9 +# CHECK-ENCODING: encoding: [0xf9,0x25,0xac,0x74] + +xvmaddwev.w.h $xr26, $xr1, $xr0 +# CHECK-INST: xvmaddwev.w.h $xr26, $xr1, $xr0 +# CHECK-ENCODING: encoding: [0x3a,0x80,0xac,0x74] + +xvmaddwev.d.w $xr23, $xr24, $xr24 +# CHECK-INST: xvmaddwev.d.w $xr23, $xr24, $xr24 +# CHECK-ENCODING: encoding: [0x17,0x63,0xad,0x74] + +xvmaddwev.q.d $xr7, $xr9, $xr22 +# CHECK-INST: xvmaddwev.q.d $xr7, $xr9, $xr22 +# CHECK-ENCODING: encoding: [0x27,0xd9,0xad,0x74] + +xvmaddwev.h.bu $xr23, $xr13, $xr26 +# CHECK-INST: xvmaddwev.h.bu $xr23, $xr13, $xr26 +# CHECK-ENCODING: encoding: [0xb7,0x69,0xb4,0x74] + +xvmaddwev.w.hu $xr13, $xr3, $xr3 +# CHECK-INST: xvmaddwev.w.hu $xr13, $xr3, $xr3 +# CHECK-ENCODING: encoding: [0x6d,0x8c,0xb4,0x74] + +xvmaddwev.d.wu $xr29, $xr27, $xr28 +# CHECK-INST: xvmaddwev.d.wu $xr29, $xr27, $xr28 +# CHECK-ENCODING: encoding: [0x7d,0x73,0xb5,0x74] + +xvmaddwev.q.du $xr29, $xr10, $xr10 +# CHECK-INST: xvmaddwev.q.du $xr29, $xr10, $xr10 +# CHECK-ENCODING: encoding: [0x5d,0xa9,0xb5,0x74] + +xvmaddwev.h.bu.b $xr30, $xr26, $xr31 +# CHECK-INST: xvmaddwev.h.bu.b $xr30, $xr26, $xr31 +# CHECK-ENCODING: encoding: [0x5e,0x7f,0xbc,0x74] + +xvmaddwev.w.hu.h $xr6, $xr17, $xr31 +# CHECK-INST: xvmaddwev.w.hu.h $xr6, $xr17, $xr31 +# CHECK-ENCODING: encoding: [0x26,0xfe,0xbc,0x74] + +xvmaddwev.d.wu.w $xr10, $xr28, $xr2 +# CHECK-INST: xvmaddwev.d.wu.w $xr10, $xr28, $xr2 +# CHECK-ENCODING: encoding: [0x8a,0x0b,0xbd,0x74] + +xvmaddwev.q.du.d $xr16, $xr20, $xr24 +# CHECK-INST: xvmaddwev.q.du.d $xr16, $xr20, $xr24 +# CHECK-ENCODING: encoding: [0x90,0xe2,0xbd,0x74] + +xvmaddwod.h.b $xr16, $xr8, $xr18 +# CHECK-INST: xvmaddwod.h.b $xr16, $xr8, $xr18 +# CHECK-ENCODING: encoding: [0x10,0x49,0xae,0x74] + +xvmaddwod.w.h $xr11, $xr24, $xr14 +# CHECK-INST: xvmaddwod.w.h $xr11, $xr24, $xr14 +# CHECK-ENCODING: encoding: [0x0b,0xbb,0xae,0x74] + +xvmaddwod.d.w $xr0, $xr20, $xr13 +# CHECK-INST: xvmaddwod.d.w $xr0, $xr20, $xr13 +# CHECK-ENCODING: encoding: [0x80,0x36,0xaf,0x74] + +xvmaddwod.q.d $xr15, $xr23, $xr18 +# CHECK-INST: xvmaddwod.q.d $xr15, $xr23, $xr18 +# CHECK-ENCODING: encoding: [0xef,0xca,0xaf,0x74] + +xvmaddwod.h.bu $xr31, $xr23, $xr7 +# CHECK-INST: xvmaddwod.h.bu $xr31, $xr23, $xr7 +# CHECK-ENCODING: encoding: [0xff,0x1e,0xb6,0x74] + +xvmaddwod.w.hu $xr29, $xr16, $xr8 +# CHECK-INST: xvmaddwod.w.hu $xr29, $xr16, $xr8 +# CHECK-ENCODING: encoding: [0x1d,0xa2,0xb6,0x74] + +xvmaddwod.d.wu $xr23, $xr16, $xr11 +# CHECK-INST: xvmaddwod.d.wu $xr23, $xr16, $xr11 +# CHECK-ENCODING: encoding: [0x17,0x2e,0xb7,0x74] + +xvmaddwod.q.du $xr9, $xr10, $xr19 +# CHECK-INST: xvmaddwod.q.du $xr9, $xr10, $xr19 +# CHECK-ENCODING: encoding: [0x49,0xcd,0xb7,0x74] + +xvmaddwod.h.bu.b $xr27, $xr2, $xr11 +# CHECK-INST: xvmaddwod.h.bu.b $xr27, $xr2, $xr11 +# CHECK-ENCODING: encoding: [0x5b,0x2c,0xbe,0x74] + +xvmaddwod.w.hu.h $xr12, $xr24, $xr19 +# CHECK-INST: xvmaddwod.w.hu.h $xr12, $xr24, $xr19 +# CHECK-ENCODING: encoding: [0x0c,0xcf,0xbe,0x74] + +xvmaddwod.d.wu.w $xr11, $xr0, $xr14 +# CHECK-INST: xvmaddwod.d.wu.w $xr11, $xr0, $xr14 +# CHECK-ENCODING: encoding: [0x0b,0x38,0xbf,0x74] + +xvmaddwod.q.du.d $xr29, $xr19, $xr31 +# CHECK-INST: xvmaddwod.q.du.d $xr29, $xr19, $xr31 +# CHECK-ENCODING: encoding: [0x7d,0xfe,0xbf,0x74] diff --git a/llvm/test/MC/LoongArch/lasx/max.s b/llvm/test/MC/LoongArch/lasx/max.s new file mode 100644 index 0000000000000000000000000000000000000000..1ae100309c5cf77df42c139878923fcb7aa70259 --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/max.s @@ -0,0 +1,68 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvmax.b $xr23, $xr8, $xr13 +# CHECK-INST: xvmax.b $xr23, $xr8, $xr13 +# CHECK-ENCODING: encoding: [0x17,0x35,0x70,0x74] + +xvmax.h $xr13, $xr18, $xr28 +# CHECK-INST: xvmax.h $xr13, $xr18, $xr28 +# CHECK-ENCODING: encoding: [0x4d,0xf2,0x70,0x74] + +xvmax.w $xr26, $xr1, $xr2 +# CHECK-INST: xvmax.w $xr26, $xr1, $xr2 +# CHECK-ENCODING: encoding: [0x3a,0x08,0x71,0x74] + +xvmax.d $xr2, $xr17, $xr13 +# CHECK-INST: xvmax.d $xr2, $xr17, $xr13 +# CHECK-ENCODING: encoding: [0x22,0xb6,0x71,0x74] + +xvmaxi.b $xr6, $xr7, 1 +# CHECK-INST: xvmaxi.b $xr6, $xr7, 1 +# CHECK-ENCODING: encoding: [0xe6,0x04,0x90,0x76] + +xvmaxi.h $xr24, $xr10, -7 +# CHECK-INST: xvmaxi.h $xr24, $xr10, -7 +# CHECK-ENCODING: encoding: [0x58,0xe5,0x90,0x76] + +xvmaxi.w $xr24, $xr18, -8 +# CHECK-INST: xvmaxi.w $xr24, $xr18, -8 +# CHECK-ENCODING: encoding: [0x58,0x62,0x91,0x76] + +xvmaxi.d $xr21, $xr5, -11 +# CHECK-INST: xvmaxi.d $xr21, $xr5, -11 +# CHECK-ENCODING: encoding: [0xb5,0xd4,0x91,0x76] + +xvmax.bu $xr29, $xr30, $xr11 +# CHECK-INST: xvmax.bu $xr29, $xr30, $xr11 +# CHECK-ENCODING: encoding: [0xdd,0x2f,0x74,0x74] + +xvmax.hu $xr4, $xr23, $xr27 +# CHECK-INST: xvmax.hu $xr4, $xr23, $xr27 +# CHECK-ENCODING: encoding: [0xe4,0xee,0x74,0x74] + +xvmax.wu $xr31, $xr0, $xr0 +# CHECK-INST: xvmax.wu $xr31, $xr0, $xr0 +# CHECK-ENCODING: encoding: [0x1f,0x00,0x75,0x74] + +xvmax.du $xr5, $xr22, $xr9 +# CHECK-INST: xvmax.du $xr5, $xr22, $xr9 +# CHECK-ENCODING: encoding: [0xc5,0xa6,0x75,0x74] + +xvmaxi.bu $xr12, $xr27, 28 +# CHECK-INST: xvmaxi.bu $xr12, $xr27, 28 +# CHECK-ENCODING: encoding: [0x6c,0x73,0x94,0x76] + +xvmaxi.hu $xr25, $xr4, 16 +# CHECK-INST: xvmaxi.hu $xr25, $xr4, 16 +# CHECK-ENCODING: encoding: [0x99,0xc0,0x94,0x76] + +xvmaxi.wu $xr27, $xr7, 21 +# CHECK-INST: xvmaxi.wu $xr27, $xr7, 21 +# CHECK-ENCODING: encoding: [0xfb,0x54,0x95,0x76] + +xvmaxi.du $xr31, $xr13, 9 +# CHECK-INST: xvmaxi.du $xr31, $xr13, 9 +# CHECK-ENCODING: encoding: [0xbf,0xa5,0x95,0x76] diff --git a/llvm/test/MC/LoongArch/lasx/min.s b/llvm/test/MC/LoongArch/lasx/min.s new file mode 100644 index 0000000000000000000000000000000000000000..170c111242267f445518404690a0d7afbe3d6d7a --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/min.s @@ -0,0 +1,68 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvmin.b $xr21, $xr26, $xr7 +# CHECK-INST: xvmin.b $xr21, $xr26, $xr7 +# CHECK-ENCODING: encoding: [0x55,0x1f,0x72,0x74] + +xvmin.h $xr29, $xr5, $xr9 +# CHECK-INST: xvmin.h $xr29, $xr5, $xr9 +# CHECK-ENCODING: encoding: [0xbd,0xa4,0x72,0x74] + +xvmin.w $xr31, $xr24, $xr20 +# CHECK-INST: xvmin.w $xr31, $xr24, $xr20 +# CHECK-ENCODING: encoding: [0x1f,0x53,0x73,0x74] + +xvmin.d $xr27, $xr27, $xr2 +# CHECK-INST: xvmin.d $xr27, $xr27, $xr2 +# CHECK-ENCODING: encoding: [0x7b,0x8b,0x73,0x74] + +xvmini.b $xr22, $xr17, 9 +# CHECK-INST: xvmini.b $xr22, $xr17, 9 +# CHECK-ENCODING: encoding: [0x36,0x26,0x92,0x76] + +xvmini.h $xr12, $xr23, -15 +# CHECK-INST: xvmini.h $xr12, $xr23, -15 +# CHECK-ENCODING: encoding: [0xec,0xc6,0x92,0x76] + +xvmini.w $xr1, $xr17, -13 +# CHECK-INST: xvmini.w $xr1, $xr17, -13 +# CHECK-ENCODING: encoding: [0x21,0x4e,0x93,0x76] + +xvmini.d $xr10, $xr31, 11 +# CHECK-INST: xvmini.d $xr10, $xr31, 11 +# CHECK-ENCODING: encoding: [0xea,0xaf,0x93,0x76] + +xvmin.bu $xr15, $xr16, $xr3 +# CHECK-INST: xvmin.bu $xr15, $xr16, $xr3 +# CHECK-ENCODING: encoding: [0x0f,0x0e,0x76,0x74] + +xvmin.hu $xr4, $xr31, $xr27 +# CHECK-INST: xvmin.hu $xr4, $xr31, $xr27 +# CHECK-ENCODING: encoding: [0xe4,0xef,0x76,0x74] + +xvmin.wu $xr15, $xr13, $xr28 +# CHECK-INST: xvmin.wu $xr15, $xr13, $xr28 +# CHECK-ENCODING: encoding: [0xaf,0x71,0x77,0x74] + +xvmin.du $xr27, $xr3, $xr5 +# CHECK-INST: xvmin.du $xr27, $xr3, $xr5 +# CHECK-ENCODING: encoding: [0x7b,0x94,0x77,0x74] + +xvmini.bu $xr6, $xr24, 7 +# CHECK-INST: xvmini.bu $xr6, $xr24, 7 +# CHECK-ENCODING: encoding: [0x06,0x1f,0x96,0x76] + +xvmini.hu $xr8, $xr5, 29 +# CHECK-INST: xvmini.hu $xr8, $xr5, 29 +# CHECK-ENCODING: encoding: [0xa8,0xf4,0x96,0x76] + +xvmini.wu $xr17, $xr13, 19 +# CHECK-INST: xvmini.wu $xr17, $xr13, 19 +# CHECK-ENCODING: encoding: [0xb1,0x4d,0x97,0x76] + +xvmini.du $xr16, $xr23, 30 +# CHECK-INST: xvmini.du $xr16, $xr23, 30 +# CHECK-ENCODING: encoding: [0xf0,0xfa,0x97,0x76] diff --git a/llvm/test/MC/LoongArch/lasx/mod.s b/llvm/test/MC/LoongArch/lasx/mod.s new file mode 100644 index 0000000000000000000000000000000000000000..bdb458a8d572e821a88462fe4b83f6c581c8967f --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/mod.s @@ -0,0 +1,36 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvmod.b $xr8, $xr3, $xr0 +# CHECK-INST: xvmod.b $xr8, $xr3, $xr0 +# CHECK-ENCODING: encoding: [0x68,0x00,0xe2,0x74] + +xvmod.h $xr2, $xr17, $xr28 +# CHECK-INST: xvmod.h $xr2, $xr17, $xr28 +# CHECK-ENCODING: encoding: [0x22,0xf2,0xe2,0x74] + +xvmod.w $xr14, $xr8, $xr13 +# CHECK-INST: xvmod.w $xr14, $xr8, $xr13 +# CHECK-ENCODING: encoding: [0x0e,0x35,0xe3,0x74] + +xvmod.d $xr11, $xr10, $xr18 +# CHECK-INST: xvmod.d $xr11, $xr10, $xr18 +# CHECK-ENCODING: encoding: [0x4b,0xc9,0xe3,0x74] + +xvmod.bu $xr16, $xr1, $xr26 +# CHECK-INST: xvmod.bu $xr16, $xr1, $xr26 +# CHECK-ENCODING: encoding: [0x30,0x68,0xe6,0x74] + +xvmod.hu $xr15, $xr13, $xr0 +# CHECK-INST: xvmod.hu $xr15, $xr13, $xr0 +# CHECK-ENCODING: encoding: [0xaf,0x81,0xe6,0x74] + +xvmod.wu $xr11, $xr19, $xr20 +# CHECK-INST: xvmod.wu $xr11, $xr19, $xr20 +# CHECK-ENCODING: encoding: [0x6b,0x52,0xe7,0x74] + +xvmod.du $xr14, $xr3, $xr6 +# CHECK-INST: xvmod.du $xr14, $xr3, $xr6 +# CHECK-ENCODING: encoding: [0x6e,0x98,0xe7,0x74] diff --git a/llvm/test/MC/LoongArch/lasx/mskgez.s b/llvm/test/MC/LoongArch/lasx/mskgez.s new file mode 100644 index 0000000000000000000000000000000000000000..347b2fed570fa573fb7d18f04c00f7b89fb4c5e2 --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/mskgez.s @@ -0,0 +1,8 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvmskgez.b $xr30, $xr5 +# CHECK-INST: xvmskgez.b $xr30, $xr5 +# CHECK-ENCODING: encoding: [0xbe,0x50,0x9c,0x76] diff --git a/llvm/test/MC/LoongArch/lasx/mskltz.s b/llvm/test/MC/LoongArch/lasx/mskltz.s new file mode 100644 index 0000000000000000000000000000000000000000..52dd411d0c6014b6cd7387fe4f61a10adca3c43e --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/mskltz.s @@ -0,0 +1,20 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvmskltz.b $xr14, $xr5 +# CHECK-INST: xvmskltz.b $xr14, $xr5 +# CHECK-ENCODING: encoding: [0xae,0x40,0x9c,0x76] + +xvmskltz.h $xr11, $xr25 +# CHECK-INST: xvmskltz.h $xr11, $xr25 +# CHECK-ENCODING: encoding: [0x2b,0x47,0x9c,0x76] + +xvmskltz.w $xr14, $xr27 +# CHECK-INST: xvmskltz.w $xr14, $xr27 +# CHECK-ENCODING: encoding: [0x6e,0x4b,0x9c,0x76] + +xvmskltz.d $xr7, $xr23 +# CHECK-INST: xvmskltz.d $xr7, $xr23 +# CHECK-ENCODING: encoding: [0xe7,0x4e,0x9c,0x76] diff --git a/llvm/test/MC/LoongArch/lasx/msknz.s b/llvm/test/MC/LoongArch/lasx/msknz.s new file mode 100644 index 0000000000000000000000000000000000000000..288c7e616526c14078ac3444fe1cfa66d25df726 --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/msknz.s @@ -0,0 +1,8 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvmsknz.b $xr22, $xr22 +# CHECK-INST: xvmsknz.b $xr22, $xr22 +# CHECK-ENCODING: encoding: [0xd6,0x62,0x9c,0x76] diff --git a/llvm/test/MC/LoongArch/lasx/msub.s b/llvm/test/MC/LoongArch/lasx/msub.s new file mode 100644 index 0000000000000000000000000000000000000000..72da08a745db880af880673ea3f00b38e5e16873 --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/msub.s @@ -0,0 +1,20 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvmsub.b $xr22, $xr20, $xr7 +# CHECK-INST: xvmsub.b $xr22, $xr20, $xr7 +# CHECK-ENCODING: encoding: [0x96,0x1e,0xaa,0x74] + +xvmsub.h $xr0, $xr18, $xr12 +# CHECK-INST: xvmsub.h $xr0, $xr18, $xr12 +# CHECK-ENCODING: encoding: [0x40,0xb2,0xaa,0x74] + +xvmsub.w $xr3, $xr22, $xr29 +# CHECK-INST: xvmsub.w $xr3, $xr22, $xr29 +# CHECK-ENCODING: encoding: [0xc3,0x76,0xab,0x74] + +xvmsub.d $xr11, $xr26, $xr2 +# CHECK-INST: xvmsub.d $xr11, $xr26, $xr2 +# CHECK-ENCODING: encoding: [0x4b,0x8b,0xab,0x74] diff --git a/llvm/test/MC/LoongArch/lasx/muh.s b/llvm/test/MC/LoongArch/lasx/muh.s new file mode 100644 index 0000000000000000000000000000000000000000..226a97a05f2b6da67285c2320ec61be10571501b --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/muh.s @@ -0,0 +1,36 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvmuh.b $xr4, $xr8, $xr4 +# CHECK-INST: xvmuh.b $xr4, $xr8, $xr4 +# CHECK-ENCODING: encoding: [0x04,0x11,0x86,0x74] + +xvmuh.h $xr5, $xr23, $xr26 +# CHECK-INST: xvmuh.h $xr5, $xr23, $xr26 +# CHECK-ENCODING: encoding: [0xe5,0xea,0x86,0x74] + +xvmuh.w $xr28, $xr3, $xr25 +# CHECK-INST: xvmuh.w $xr28, $xr3, $xr25 +# CHECK-ENCODING: encoding: [0x7c,0x64,0x87,0x74] + +xvmuh.d $xr6, $xr0, $xr9 +# CHECK-INST: xvmuh.d $xr6, $xr0, $xr9 +# CHECK-ENCODING: encoding: [0x06,0xa4,0x87,0x74] + +xvmuh.bu $xr15, $xr20, $xr24 +# CHECK-INST: xvmuh.bu $xr15, $xr20, $xr24 +# CHECK-ENCODING: encoding: [0x8f,0x62,0x88,0x74] + +xvmuh.hu $xr28, $xr12, $xr27 +# CHECK-INST: xvmuh.hu $xr28, $xr12, $xr27 +# CHECK-ENCODING: encoding: [0x9c,0xed,0x88,0x74] + +xvmuh.wu $xr25, $xr6, $xr10 +# CHECK-INST: xvmuh.wu $xr25, $xr6, $xr10 +# CHECK-ENCODING: encoding: [0xd9,0x28,0x89,0x74] + +xvmuh.du $xr19, $xr8, $xr31 +# CHECK-INST: xvmuh.du $xr19, $xr8, $xr31 +# CHECK-ENCODING: encoding: [0x13,0xfd,0x89,0x74] diff --git a/llvm/test/MC/LoongArch/lasx/mul.s b/llvm/test/MC/LoongArch/lasx/mul.s new file mode 100644 index 0000000000000000000000000000000000000000..8d24b6549b45165863d9156ad67e1d7356491daa --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/mul.s @@ -0,0 +1,20 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvmul.b $xr18, $xr7, $xr27 +# CHECK-INST: xvmul.b $xr18, $xr7, $xr27 +# CHECK-ENCODING: encoding: [0xf2,0x6c,0x84,0x74] + +xvmul.h $xr9, $xr23, $xr18 +# CHECK-INST: xvmul.h $xr9, $xr23, $xr18 +# CHECK-ENCODING: encoding: [0xe9,0xca,0x84,0x74] + +xvmul.w $xr21, $xr8, $xr27 +# CHECK-INST: xvmul.w $xr21, $xr8, $xr27 +# CHECK-ENCODING: encoding: [0x15,0x6d,0x85,0x74] + +xvmul.d $xr0, $xr15, $xr8 +# CHECK-INST: xvmul.d $xr0, $xr15, $xr8 +# CHECK-ENCODING: encoding: [0xe0,0xa1,0x85,0x74] diff --git a/llvm/test/MC/LoongArch/lasx/mulw.s b/llvm/test/MC/LoongArch/lasx/mulw.s new file mode 100644 index 0000000000000000000000000000000000000000..42aa23ba91d8f64def9a52a925cb1271836e3e8c --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/mulw.s @@ -0,0 +1,100 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvmulwev.h.b $xr2, $xr7, $xr16 +# CHECK-INST: xvmulwev.h.b $xr2, $xr7, $xr16 +# CHECK-ENCODING: encoding: [0xe2,0x40,0x90,0x74] + +xvmulwev.w.h $xr12, $xr11, $xr6 +# CHECK-INST: xvmulwev.w.h $xr12, $xr11, $xr6 +# CHECK-ENCODING: encoding: [0x6c,0x99,0x90,0x74] + +xvmulwev.d.w $xr16, $xr24, $xr15 +# CHECK-INST: xvmulwev.d.w $xr16, $xr24, $xr15 +# CHECK-ENCODING: encoding: [0x10,0x3f,0x91,0x74] + +xvmulwev.q.d $xr17, $xr16, $xr4 +# CHECK-INST: xvmulwev.q.d $xr17, $xr16, $xr4 +# CHECK-ENCODING: encoding: [0x11,0x92,0x91,0x74] + +xvmulwev.h.bu $xr20, $xr7, $xr29 +# CHECK-INST: xvmulwev.h.bu $xr20, $xr7, $xr29 +# CHECK-ENCODING: encoding: [0xf4,0x74,0x98,0x74] + +xvmulwev.w.hu $xr13, $xr24, $xr17 +# CHECK-INST: xvmulwev.w.hu $xr13, $xr24, $xr17 +# CHECK-ENCODING: encoding: [0x0d,0xc7,0x98,0x74] + +xvmulwev.d.wu $xr1, $xr24, $xr30 +# CHECK-INST: xvmulwev.d.wu $xr1, $xr24, $xr30 +# CHECK-ENCODING: encoding: [0x01,0x7b,0x99,0x74] + +xvmulwev.q.du $xr1, $xr22, $xr27 +# CHECK-INST: xvmulwev.q.du $xr1, $xr22, $xr27 +# CHECK-ENCODING: encoding: [0xc1,0xee,0x99,0x74] + +xvmulwev.h.bu.b $xr13, $xr28, $xr12 +# CHECK-INST: xvmulwev.h.bu.b $xr13, $xr28, $xr12 +# CHECK-ENCODING: encoding: [0x8d,0x33,0xa0,0x74] + +xvmulwev.w.hu.h $xr27, $xr16, $xr7 +# CHECK-INST: xvmulwev.w.hu.h $xr27, $xr16, $xr7 +# CHECK-ENCODING: encoding: [0x1b,0x9e,0xa0,0x74] + +xvmulwev.d.wu.w $xr13, $xr7, $xr17 +# CHECK-INST: xvmulwev.d.wu.w $xr13, $xr7, $xr17 +# CHECK-ENCODING: encoding: [0xed,0x44,0xa1,0x74] + +xvmulwev.q.du.d $xr9, $xr20, $xr15 +# CHECK-INST: xvmulwev.q.du.d $xr9, $xr20, $xr15 +# CHECK-ENCODING: encoding: [0x89,0xbe,0xa1,0x74] + +xvmulwod.h.b $xr16, $xr18, $xr2 +# CHECK-INST: xvmulwod.h.b $xr16, $xr18, $xr2 +# CHECK-ENCODING: encoding: [0x50,0x0a,0x92,0x74] + +xvmulwod.w.h $xr30, $xr2, $xr23 +# CHECK-INST: xvmulwod.w.h $xr30, $xr2, $xr23 +# CHECK-ENCODING: encoding: [0x5e,0xdc,0x92,0x74] + +xvmulwod.d.w $xr30, $xr27, $xr8 +# CHECK-INST: xvmulwod.d.w $xr30, $xr27, $xr8 +# CHECK-ENCODING: encoding: [0x7e,0x23,0x93,0x74] + +xvmulwod.q.d $xr20, $xr21, $xr15 +# CHECK-INST: xvmulwod.q.d $xr20, $xr21, $xr15 +# CHECK-ENCODING: encoding: [0xb4,0xbe,0x93,0x74] + +xvmulwod.h.bu $xr19, $xr26, $xr7 +# CHECK-INST: xvmulwod.h.bu $xr19, $xr26, $xr7 +# CHECK-ENCODING: encoding: [0x53,0x1f,0x9a,0x74] + +xvmulwod.w.hu $xr14, $xr17, $xr6 +# CHECK-INST: xvmulwod.w.hu $xr14, $xr17, $xr6 +# CHECK-ENCODING: encoding: [0x2e,0x9a,0x9a,0x74] + +xvmulwod.d.wu $xr24, $xr22, $xr20 +# CHECK-INST: xvmulwod.d.wu $xr24, $xr22, $xr20 +# CHECK-ENCODING: encoding: [0xd8,0x52,0x9b,0x74] + +xvmulwod.q.du $xr28, $xr31, $xr7 +# CHECK-INST: xvmulwod.q.du $xr28, $xr31, $xr7 +# CHECK-ENCODING: encoding: [0xfc,0x9f,0x9b,0x74] + +xvmulwod.h.bu.b $xr24, $xr15, $xr28 +# CHECK-INST: xvmulwod.h.bu.b $xr24, $xr15, $xr28 +# CHECK-ENCODING: encoding: [0xf8,0x71,0xa2,0x74] + +xvmulwod.w.hu.h $xr24, $xr8, $xr1 +# CHECK-INST: xvmulwod.w.hu.h $xr24, $xr8, $xr1 +# CHECK-ENCODING: encoding: [0x18,0x85,0xa2,0x74] + +xvmulwod.d.wu.w $xr10, $xr3, $xr1 +# CHECK-INST: xvmulwod.d.wu.w $xr10, $xr3, $xr1 +# CHECK-ENCODING: encoding: [0x6a,0x04,0xa3,0x74] + +xvmulwod.q.du.d $xr15, $xr15, $xr2 +# CHECK-INST: xvmulwod.q.du.d $xr15, $xr15, $xr2 +# CHECK-ENCODING: encoding: [0xef,0x89,0xa3,0x74] diff --git a/llvm/test/MC/LoongArch/lasx/neg.s b/llvm/test/MC/LoongArch/lasx/neg.s new file mode 100644 index 0000000000000000000000000000000000000000..7db7de62e0ebdc361fd229dd6648dfb87fa6e067 --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/neg.s @@ -0,0 +1,20 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvneg.b $xr23, $xr4 +# CHECK-INST: xvneg.b $xr23, $xr4 +# CHECK-ENCODING: encoding: [0x97,0x30,0x9c,0x76] + +xvneg.h $xr8, $xr14 +# CHECK-INST: xvneg.h $xr8, $xr14 +# CHECK-ENCODING: encoding: [0xc8,0x35,0x9c,0x76] + +xvneg.w $xr23, $xr14 +# CHECK-INST: xvneg.w $xr23, $xr14 +# CHECK-ENCODING: encoding: [0xd7,0x39,0x9c,0x76] + +xvneg.d $xr20, $xr17 +# CHECK-INST: xvneg.d $xr20, $xr17 +# CHECK-ENCODING: encoding: [0x34,0x3e,0x9c,0x76] diff --git a/llvm/test/MC/LoongArch/lasx/nor.s b/llvm/test/MC/LoongArch/lasx/nor.s new file mode 100644 index 0000000000000000000000000000000000000000..95ef446bf4f06d92f876c16c719866d0ff1ebee8 --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/nor.s @@ -0,0 +1,8 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvnor.v $xr4, $xr23, $xr3 +# CHECK-INST: xvnor.v $xr4, $xr23, $xr3 +# CHECK-ENCODING: encoding: [0xe4,0x8e,0x27,0x75] diff --git a/llvm/test/MC/LoongArch/lasx/nori.s b/llvm/test/MC/LoongArch/lasx/nori.s new file mode 100644 index 0000000000000000000000000000000000000000..42ea27d4379b2a7f4f663c2f3c24a75b3d4dd2e3 --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/nori.s @@ -0,0 +1,8 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvnori.b $xr7, $xr1, 209 +# CHECK-INST: xvnori.b $xr7, $xr1, 209 +# CHECK-ENCODING: encoding: [0x27,0x44,0xdf,0x77] diff --git a/llvm/test/MC/LoongArch/lasx/or.s b/llvm/test/MC/LoongArch/lasx/or.s new file mode 100644 index 0000000000000000000000000000000000000000..dd4c17600cb0e4b6f571f30efff64a7069d3bbe9 --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/or.s @@ -0,0 +1,8 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvor.v $xr6, $xr29, $xr21 +# CHECK-INST: xvor.v $xr6, $xr29, $xr21 +# CHECK-ENCODING: encoding: [0xa6,0xd7,0x26,0x75] diff --git a/llvm/test/MC/LoongArch/lasx/ori.s b/llvm/test/MC/LoongArch/lasx/ori.s new file mode 100644 index 0000000000000000000000000000000000000000..2b1e56a5c3ac7c8500450e6db75e86241885e005 --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/ori.s @@ -0,0 +1,8 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvori.b $xr6, $xr2, 239 +# CHECK-INST: xvori.b $xr6, $xr2, 239 +# CHECK-ENCODING: encoding: [0x46,0xbc,0xd7,0x77] diff --git a/llvm/test/MC/LoongArch/lasx/orn.s b/llvm/test/MC/LoongArch/lasx/orn.s new file mode 100644 index 0000000000000000000000000000000000000000..a318de2574b894200ab50494378418cb879b6a82 --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/orn.s @@ -0,0 +1,8 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvorn.v $xr17, $xr29, $xr5 +# CHECK-INST: xvorn.v $xr17, $xr29, $xr5 +# CHECK-ENCODING: encoding: [0xb1,0x97,0x28,0x75] diff --git a/llvm/test/MC/LoongArch/lasx/pack.s b/llvm/test/MC/LoongArch/lasx/pack.s new file mode 100644 index 0000000000000000000000000000000000000000..bb71be4b0fcac537694f86eaf9b06390c8c0911b --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/pack.s @@ -0,0 +1,36 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvpackev.b $xr21, $xr2, $xr8 +# CHECK-INST: xvpackev.b $xr21, $xr2, $xr8 +# CHECK-ENCODING: encoding: [0x55,0x20,0x16,0x75] + +xvpackev.h $xr8, $xr18, $xr6 +# CHECK-INST: xvpackev.h $xr8, $xr18, $xr6 +# CHECK-ENCODING: encoding: [0x48,0x9a,0x16,0x75] + +xvpackev.w $xr0, $xr6, $xr30 +# CHECK-INST: xvpackev.w $xr0, $xr6, $xr30 +# CHECK-ENCODING: encoding: [0xc0,0x78,0x17,0x75] + +xvpackev.d $xr0, $xr9, $xr4 +# CHECK-INST: xvpackev.d $xr0, $xr9, $xr4 +# CHECK-ENCODING: encoding: [0x20,0x91,0x17,0x75] + +xvpackod.b $xr28, $xr29, $xr31 +# CHECK-INST: xvpackod.b $xr28, $xr29, $xr31 +# CHECK-ENCODING: encoding: [0xbc,0x7f,0x18,0x75] + +xvpackod.h $xr14, $xr10, $xr6 +# CHECK-INST: xvpackod.h $xr14, $xr10, $xr6 +# CHECK-ENCODING: encoding: [0x4e,0x99,0x18,0x75] + +xvpackod.w $xr22, $xr21, $xr2 +# CHECK-INST: xvpackod.w $xr22, $xr21, $xr2 +# CHECK-ENCODING: encoding: [0xb6,0x0a,0x19,0x75] + +xvpackod.d $xr18, $xr9, $xr2 +# CHECK-INST: xvpackod.d $xr18, $xr9, $xr2 +# CHECK-ENCODING: encoding: [0x32,0x89,0x19,0x75] diff --git a/llvm/test/MC/LoongArch/lasx/pcnt.s b/llvm/test/MC/LoongArch/lasx/pcnt.s new file mode 100644 index 0000000000000000000000000000000000000000..9f1786bec0eee13c0b2525dbfbc56f539cef311f --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/pcnt.s @@ -0,0 +1,20 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvpcnt.b $xr8, $xr27 +# CHECK-INST: xvpcnt.b $xr8, $xr27 +# CHECK-ENCODING: encoding: [0x68,0x23,0x9c,0x76] + +xvpcnt.h $xr12, $xr4 +# CHECK-INST: xvpcnt.h $xr12, $xr4 +# CHECK-ENCODING: encoding: [0x8c,0x24,0x9c,0x76] + +xvpcnt.w $xr31, $xr23 +# CHECK-INST: xvpcnt.w $xr31, $xr23 +# CHECK-ENCODING: encoding: [0xff,0x2a,0x9c,0x76] + +xvpcnt.d $xr26, $xr12 +# CHECK-INST: xvpcnt.d $xr26, $xr12 +# CHECK-ENCODING: encoding: [0x9a,0x2d,0x9c,0x76] diff --git a/llvm/test/MC/LoongArch/lasx/perm.s b/llvm/test/MC/LoongArch/lasx/perm.s new file mode 100644 index 0000000000000000000000000000000000000000..85bef644c7ae79a6a5fd8ddb68302cbdb4630c92 --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/perm.s @@ -0,0 +1,8 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvperm.w $xr24, $xr23, $xr16 +# CHECK-INST: xvperm.w $xr24, $xr23, $xr16 +# CHECK-ENCODING: encoding: [0xf8,0x42,0x7d,0x75] diff --git a/llvm/test/MC/LoongArch/lasx/permi.s b/llvm/test/MC/LoongArch/lasx/permi.s new file mode 100644 index 0000000000000000000000000000000000000000..4d925a86b20655d5ce1c66118469651edb850f54 --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/permi.s @@ -0,0 +1,16 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvpermi.w $xr7, $xr12, 101 +# CHECK-INST: xvpermi.w $xr7, $xr12, 101 +# CHECK-ENCODING: encoding: [0x87,0x95,0xe5,0x77] + +xvpermi.d $xr17, $xr6, 131 +# CHECK-INST: xvpermi.d $xr17, $xr6, 131 +# CHECK-ENCODING: encoding: [0xd1,0x0c,0xea,0x77] + +xvpermi.q $xr10, $xr15, 184 +# CHECK-INST: xvpermi.q $xr10, $xr15, 184 +# CHECK-ENCODING: encoding: [0xea,0xe1,0xee,0x77] diff --git a/llvm/test/MC/LoongArch/lasx/pick.s b/llvm/test/MC/LoongArch/lasx/pick.s new file mode 100644 index 0000000000000000000000000000000000000000..7510d2088ebe60c70ac0955046397899342f0106 --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/pick.s @@ -0,0 +1,36 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvpickev.b $xr22, $xr27, $xr6 +# CHECK-INST: xvpickev.b $xr22, $xr27, $xr6 +# CHECK-ENCODING: encoding: [0x76,0x1b,0x1e,0x75] + +xvpickev.h $xr14, $xr11, $xr3 +# CHECK-INST: xvpickev.h $xr14, $xr11, $xr3 +# CHECK-ENCODING: encoding: [0x6e,0x8d,0x1e,0x75] + +xvpickev.w $xr30, $xr28, $xr13 +# CHECK-INST: xvpickev.w $xr30, $xr28, $xr13 +# CHECK-ENCODING: encoding: [0x9e,0x37,0x1f,0x75] + +xvpickev.d $xr1, $xr24, $xr9 +# CHECK-INST: xvpickev.d $xr1, $xr24, $xr9 +# CHECK-ENCODING: encoding: [0x01,0xa7,0x1f,0x75] + +xvpickod.b $xr14, $xr22, $xr15 +# CHECK-INST: xvpickod.b $xr14, $xr22, $xr15 +# CHECK-ENCODING: encoding: [0xce,0x3e,0x20,0x75] + +xvpickod.h $xr31, $xr21, $xr12 +# CHECK-INST: xvpickod.h $xr31, $xr21, $xr12 +# CHECK-ENCODING: encoding: [0xbf,0xb2,0x20,0x75] + +xvpickod.w $xr31, $xr0, $xr30 +# CHECK-INST: xvpickod.w $xr31, $xr0, $xr30 +# CHECK-ENCODING: encoding: [0x1f,0x78,0x21,0x75] + +xvpickod.d $xr10, $xr5, $xr16 +# CHECK-INST: xvpickod.d $xr10, $xr5, $xr16 +# CHECK-ENCODING: encoding: [0xaa,0xc0,0x21,0x75] diff --git a/llvm/test/MC/LoongArch/lasx/pickve.s b/llvm/test/MC/LoongArch/lasx/pickve.s new file mode 100644 index 0000000000000000000000000000000000000000..6f8c40bfa69945d1a175872f1819723afc386ec1 --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/pickve.s @@ -0,0 +1,12 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvpickve.w $xr25, $xr28, 1 +# CHECK-INST: xvpickve.w $xr25, $xr28, 1 +# CHECK-ENCODING: encoding: [0x99,0xc7,0x03,0x77] + +xvpickve.d $xr13, $xr1, 0 +# CHECK-INST: xvpickve.d $xr13, $xr1, 0 +# CHECK-ENCODING: encoding: [0x2d,0xe0,0x03,0x77] diff --git a/llvm/test/MC/LoongArch/lasx/pickve2gr.s b/llvm/test/MC/LoongArch/lasx/pickve2gr.s new file mode 100644 index 0000000000000000000000000000000000000000..d378d4ff6a1ef98e29c81697d5be8d2a73c1785d --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/pickve2gr.s @@ -0,0 +1,20 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvpickve2gr.w $r14, $xr11, 6 +# CHECK-INST: xvpickve2gr.w $t2, $xr11, 6 +# CHECK-ENCODING: encoding: [0x6e,0xd9,0xef,0x76] + +xvpickve2gr.d $r8, $xr6, 0 +# CHECK-INST: xvpickve2gr.d $a4, $xr6, 0 +# CHECK-ENCODING: encoding: [0xc8,0xe0,0xef,0x76] + +xvpickve2gr.wu $r12, $xr1, 4 +# CHECK-INST: xvpickve2gr.wu $t0, $xr1, 4 +# CHECK-ENCODING: encoding: [0x2c,0xd0,0xf3,0x76] + +xvpickve2gr.du $r10, $xr8, 0 +# CHECK-INST: xvpickve2gr.du $a6, $xr8, 0 +# CHECK-ENCODING: encoding: [0x0a,0xe1,0xf3,0x76] diff --git a/llvm/test/MC/LoongArch/lasx/repl128vei.s b/llvm/test/MC/LoongArch/lasx/repl128vei.s new file mode 100644 index 0000000000000000000000000000000000000000..44f9b9596546346b1bf3692e31a2e152b3950771 --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/repl128vei.s @@ -0,0 +1,20 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvrepl128vei.b $xr10, $xr19, 2 +# CHECK-INST: xvrepl128vei.b $xr10, $xr19, 2 +# CHECK-ENCODING: encoding: [0x6a,0x8a,0xf7,0x76] + +xvrepl128vei.h $xr6, $xr19, 2 +# CHECK-INST: xvrepl128vei.h $xr6, $xr19, 2 +# CHECK-ENCODING: encoding: [0x66,0xca,0xf7,0x76] + +xvrepl128vei.w $xr11, $xr13, 1 +# CHECK-INST: xvrepl128vei.w $xr11, $xr13, 1 +# CHECK-ENCODING: encoding: [0xab,0xe5,0xf7,0x76] + +xvrepl128vei.d $xr31, $xr23, 0 +# CHECK-INST: xvrepl128vei.d $xr31, $xr23, 0 +# CHECK-ENCODING: encoding: [0xff,0xf2,0xf7,0x76] diff --git a/llvm/test/MC/LoongArch/lasx/replgr2vr.s b/llvm/test/MC/LoongArch/lasx/replgr2vr.s new file mode 100644 index 0000000000000000000000000000000000000000..d1584c3c017f0d8fc485954822e9fc41cfbbe08c --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/replgr2vr.s @@ -0,0 +1,20 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvreplgr2vr.b $xr16, $r16 +# CHECK-INST: xvreplgr2vr.b $xr16, $t4 +# CHECK-ENCODING: encoding: [0x10,0x02,0x9f,0x76] + +xvreplgr2vr.h $xr7, $r22 +# CHECK-INST: xvreplgr2vr.h $xr7, $fp +# CHECK-ENCODING: encoding: [0xc7,0x06,0x9f,0x76] + +xvreplgr2vr.w $xr4, $r15 +# CHECK-INST: xvreplgr2vr.w $xr4, $t3 +# CHECK-ENCODING: encoding: [0xe4,0x09,0x9f,0x76] + +xvreplgr2vr.d $xr16, $r24 +# CHECK-INST: xvreplgr2vr.d $xr16, $s1 +# CHECK-ENCODING: encoding: [0x10,0x0f,0x9f,0x76] diff --git a/llvm/test/MC/LoongArch/lasx/replve.s b/llvm/test/MC/LoongArch/lasx/replve.s new file mode 100644 index 0000000000000000000000000000000000000000..c0319a426b5b5f33305e1ce24a9bebb4fb6549e0 --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/replve.s @@ -0,0 +1,20 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvreplve.b $xr20, $xr16, $r11 +# CHECK-INST: xvreplve.b $xr20, $xr16, $a7 +# CHECK-ENCODING: encoding: [0x14,0x2e,0x22,0x75] + +xvreplve.h $xr0, $xr21, $r24 +# CHECK-INST: xvreplve.h $xr0, $xr21, $s1 +# CHECK-ENCODING: encoding: [0xa0,0xe2,0x22,0x75] + +xvreplve.w $xr20, $xr18, $r18 +# CHECK-INST: xvreplve.w $xr20, $xr18, $t6 +# CHECK-ENCODING: encoding: [0x54,0x4a,0x23,0x75] + +xvreplve.d $xr4, $xr3, $r23 +# CHECK-INST: xvreplve.d $xr4, $xr3, $s0 +# CHECK-ENCODING: encoding: [0x64,0xdc,0x23,0x75] diff --git a/llvm/test/MC/LoongArch/lasx/replve0.s b/llvm/test/MC/LoongArch/lasx/replve0.s new file mode 100644 index 0000000000000000000000000000000000000000..f3289a5c464ff5ca5a1daa4af14cd9da3402aadc --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/replve0.s @@ -0,0 +1,24 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvreplve0.b $xr11, $xr20 +# CHECK-INST: xvreplve0.b $xr11, $xr20 +# CHECK-ENCODING: encoding: [0x8b,0x02,0x07,0x77] + +xvreplve0.h $xr13, $xr26 +# CHECK-INST: xvreplve0.h $xr13, $xr26 +# CHECK-ENCODING: encoding: [0x4d,0x83,0x07,0x77] + +xvreplve0.w $xr8, $xr12 +# CHECK-INST: xvreplve0.w $xr8, $xr12 +# CHECK-ENCODING: encoding: [0x88,0xc1,0x07,0x77] + +xvreplve0.d $xr20, $xr4 +# CHECK-INST: xvreplve0.d $xr20, $xr4 +# CHECK-ENCODING: encoding: [0x94,0xe0,0x07,0x77] + +xvreplve0.q $xr17, $xr20 +# CHECK-INST: xvreplve0.q $xr17, $xr20 +# CHECK-ENCODING: encoding: [0x91,0xf2,0x07,0x77] diff --git a/llvm/test/MC/LoongArch/lasx/rotr.s b/llvm/test/MC/LoongArch/lasx/rotr.s new file mode 100644 index 0000000000000000000000000000000000000000..c6dec2da73e6654da4e44e4f444b0493a238e383 --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/rotr.s @@ -0,0 +1,36 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvrotr.b $xr0, $xr6, $xr30 +# CHECK-INST: xvrotr.b $xr0, $xr6, $xr30 +# CHECK-ENCODING: encoding: [0xc0,0x78,0xee,0x74] + +xvrotr.h $xr19, $xr17, $xr10 +# CHECK-INST: xvrotr.h $xr19, $xr17, $xr10 +# CHECK-ENCODING: encoding: [0x33,0xaa,0xee,0x74] + +xvrotr.w $xr18, $xr2, $xr7 +# CHECK-INST: xvrotr.w $xr18, $xr2, $xr7 +# CHECK-ENCODING: encoding: [0x52,0x1c,0xef,0x74] + +xvrotr.d $xr11, $xr23, $xr11 +# CHECK-INST: xvrotr.d $xr11, $xr23, $xr11 +# CHECK-ENCODING: encoding: [0xeb,0xae,0xef,0x74] + +xvrotri.b $xr1, $xr5, 3 +# CHECK-INST: xvrotri.b $xr1, $xr5, 3 +# CHECK-ENCODING: encoding: [0xa1,0x2c,0xa0,0x76] + +xvrotri.h $xr1, $xr17, 3 +# CHECK-INST: xvrotri.h $xr1, $xr17, 3 +# CHECK-ENCODING: encoding: [0x21,0x4e,0xa0,0x76] + +xvrotri.w $xr25, $xr23, 19 +# CHECK-INST: xvrotri.w $xr25, $xr23, 19 +# CHECK-ENCODING: encoding: [0xf9,0xce,0xa0,0x76] + +xvrotri.d $xr7, $xr24, 37 +# CHECK-INST: xvrotri.d $xr7, $xr24, 37 +# CHECK-ENCODING: encoding: [0x07,0x97,0xa1,0x76] diff --git a/llvm/test/MC/LoongArch/lasx/sadd.s b/llvm/test/MC/LoongArch/lasx/sadd.s new file mode 100644 index 0000000000000000000000000000000000000000..abc84aaf8db75eb1137ffffeffb0b37053364f32 --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/sadd.s @@ -0,0 +1,36 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvsadd.b $xr27, $xr30, $xr22 +# CHECK-INST: xvsadd.b $xr27, $xr30, $xr22 +# CHECK-ENCODING: encoding: [0xdb,0x5b,0x46,0x74] + +xvsadd.h $xr29, $xr0, $xr1 +# CHECK-INST: xvsadd.h $xr29, $xr0, $xr1 +# CHECK-ENCODING: encoding: [0x1d,0x84,0x46,0x74] + +xvsadd.w $xr22, $xr28, $xr31 +# CHECK-INST: xvsadd.w $xr22, $xr28, $xr31 +# CHECK-ENCODING: encoding: [0x96,0x7f,0x47,0x74] + +xvsadd.d $xr5, $xr18, $xr26 +# CHECK-INST: xvsadd.d $xr5, $xr18, $xr26 +# CHECK-ENCODING: encoding: [0x45,0xea,0x47,0x74] + +xvsadd.bu $xr29, $xr20, $xr28 +# CHECK-INST: xvsadd.bu $xr29, $xr20, $xr28 +# CHECK-ENCODING: encoding: [0x9d,0x72,0x4a,0x74] + +xvsadd.hu $xr7, $xr16, $xr6 +# CHECK-INST: xvsadd.hu $xr7, $xr16, $xr6 +# CHECK-ENCODING: encoding: [0x07,0x9a,0x4a,0x74] + +xvsadd.wu $xr2, $xr10, $xr15 +# CHECK-INST: xvsadd.wu $xr2, $xr10, $xr15 +# CHECK-ENCODING: encoding: [0x42,0x3d,0x4b,0x74] + +xvsadd.du $xr18, $xr24, $xr14 +# CHECK-INST: xvsadd.du $xr18, $xr24, $xr14 +# CHECK-ENCODING: encoding: [0x12,0xbb,0x4b,0x74] diff --git a/llvm/test/MC/LoongArch/lasx/sat.s b/llvm/test/MC/LoongArch/lasx/sat.s new file mode 100644 index 0000000000000000000000000000000000000000..19b51ad1cbd37e757b48187f6befb7f87d821294 --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/sat.s @@ -0,0 +1,36 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvsat.b $xr22, $xr7, 2 +# CHECK-INST: xvsat.b $xr22, $xr7, 2 +# CHECK-ENCODING: encoding: [0xf6,0x28,0x24,0x77] + +xvsat.h $xr3, $xr0, 5 +# CHECK-INST: xvsat.h $xr3, $xr0, 5 +# CHECK-ENCODING: encoding: [0x03,0x54,0x24,0x77] + +xvsat.w $xr9, $xr16, 0 +# CHECK-INST: xvsat.w $xr9, $xr16, 0 +# CHECK-ENCODING: encoding: [0x09,0x82,0x24,0x77] + +xvsat.d $xr3, $xr8, 1 +# CHECK-INST: xvsat.d $xr3, $xr8, 1 +# CHECK-ENCODING: encoding: [0x03,0x05,0x25,0x77] + +xvsat.bu $xr6, $xr6, 4 +# CHECK-INST: xvsat.bu $xr6, $xr6, 4 +# CHECK-ENCODING: encoding: [0xc6,0x30,0x28,0x77] + +xvsat.hu $xr12, $xr25, 12 +# CHECK-INST: xvsat.hu $xr12, $xr25, 12 +# CHECK-ENCODING: encoding: [0x2c,0x73,0x28,0x77] + +xvsat.wu $xr20, $xr1, 3 +# CHECK-INST: xvsat.wu $xr20, $xr1, 3 +# CHECK-ENCODING: encoding: [0x34,0x8c,0x28,0x77] + +xvsat.du $xr5, $xr20, 7 +# CHECK-INST: xvsat.du $xr5, $xr20, 7 +# CHECK-ENCODING: encoding: [0x85,0x1e,0x29,0x77] diff --git a/llvm/test/MC/LoongArch/lasx/seq.s b/llvm/test/MC/LoongArch/lasx/seq.s new file mode 100644 index 0000000000000000000000000000000000000000..ca18422067d5b0cb979ebc1a6bceba0a1df9f853 --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/seq.s @@ -0,0 +1,36 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvseq.b $xr3, $xr4, $xr19 +# CHECK-INST: xvseq.b $xr3, $xr4, $xr19 +# CHECK-ENCODING: encoding: [0x83,0x4c,0x00,0x74] + +xvseq.h $xr0, $xr21, $xr5 +# CHECK-INST: xvseq.h $xr0, $xr21, $xr5 +# CHECK-ENCODING: encoding: [0xa0,0x96,0x00,0x74] + +xvseq.w $xr6, $xr16, $xr19 +# CHECK-INST: xvseq.w $xr6, $xr16, $xr19 +# CHECK-ENCODING: encoding: [0x06,0x4e,0x01,0x74] + +xvseq.d $xr8, $xr13, $xr13 +# CHECK-INST: xvseq.d $xr8, $xr13, $xr13 +# CHECK-ENCODING: encoding: [0xa8,0xb5,0x01,0x74] + +xvseqi.b $xr12, $xr25, 0 +# CHECK-INST: xvseqi.b $xr12, $xr25, 0 +# CHECK-ENCODING: encoding: [0x2c,0x03,0x80,0x76] + +xvseqi.h $xr9, $xr4, 10 +# CHECK-INST: xvseqi.h $xr9, $xr4, 10 +# CHECK-ENCODING: encoding: [0x89,0xa8,0x80,0x76] + +xvseqi.w $xr25, $xr4, -12 +# CHECK-INST: xvseqi.w $xr25, $xr4, -12 +# CHECK-ENCODING: encoding: [0x99,0x50,0x81,0x76] + +xvseqi.d $xr11, $xr7, 7 +# CHECK-INST: xvseqi.d $xr11, $xr7, 7 +# CHECK-ENCODING: encoding: [0xeb,0x9c,0x81,0x76] diff --git a/llvm/test/MC/LoongArch/lasx/set.s b/llvm/test/MC/LoongArch/lasx/set.s new file mode 100644 index 0000000000000000000000000000000000000000..ad49e4d91bc7ddc49806b0c028a900d3f514d6ac --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/set.s @@ -0,0 +1,12 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvseteqz.v $fcc7, $xr1 +# CHECK-INST: xvseteqz.v $fcc7, $xr1 +# CHECK-ENCODING: encoding: [0x27,0x98,0x9c,0x76] + +xvsetnez.v $fcc7, $xr13 +# CHECK-INST: xvsetnez.v $fcc7, $xr13 +# CHECK-ENCODING: encoding: [0xa7,0x9d,0x9c,0x76] diff --git a/llvm/test/MC/LoongArch/lasx/setallnez.s b/llvm/test/MC/LoongArch/lasx/setallnez.s new file mode 100644 index 0000000000000000000000000000000000000000..d58bad5f49e83e1e94b423a99890d2bddefb8180 --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/setallnez.s @@ -0,0 +1,20 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvsetallnez.b $fcc5, $xr29 +# CHECK-INST: xvsetallnez.b $fcc5, $xr29 +# CHECK-ENCODING: encoding: [0xa5,0xb3,0x9c,0x76] + +xvsetallnez.h $fcc5, $xr4 +# CHECK-INST: xvsetallnez.h $fcc5, $xr4 +# CHECK-ENCODING: encoding: [0x85,0xb4,0x9c,0x76] + +xvsetallnez.w $fcc4, $xr5 +# CHECK-INST: xvsetallnez.w $fcc4, $xr5 +# CHECK-ENCODING: encoding: [0xa4,0xb8,0x9c,0x76] + +xvsetallnez.d $fcc7, $xr20 +# CHECK-INST: xvsetallnez.d $fcc7, $xr20 +# CHECK-ENCODING: encoding: [0x87,0xbe,0x9c,0x76] diff --git a/llvm/test/MC/LoongArch/lasx/setanyeqz.s b/llvm/test/MC/LoongArch/lasx/setanyeqz.s new file mode 100644 index 0000000000000000000000000000000000000000..d879dd0f24b9f9ee8d8d9ddf0519662c8582332d --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/setanyeqz.s @@ -0,0 +1,20 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvsetanyeqz.b $fcc5, $xr8 +# CHECK-INST: xvsetanyeqz.b $fcc5, $xr8 +# CHECK-ENCODING: encoding: [0x05,0xa1,0x9c,0x76] + +xvsetanyeqz.h $fcc5, $xr20 +# CHECK-INST: xvsetanyeqz.h $fcc5, $xr20 +# CHECK-ENCODING: encoding: [0x85,0xa6,0x9c,0x76] + +xvsetanyeqz.w $fcc7, $xr6 +# CHECK-INST: xvsetanyeqz.w $fcc7, $xr6 +# CHECK-ENCODING: encoding: [0xc7,0xa8,0x9c,0x76] + +xvsetanyeqz.d $fcc6, $xr17 +# CHECK-INST: xvsetanyeqz.d $fcc6, $xr17 +# CHECK-ENCODING: encoding: [0x26,0xae,0x9c,0x76] diff --git a/llvm/test/MC/LoongArch/lasx/shuf.s b/llvm/test/MC/LoongArch/lasx/shuf.s new file mode 100644 index 0000000000000000000000000000000000000000..d08039d5429cbc2433752c0a8d17de135c53a486 --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/shuf.s @@ -0,0 +1,20 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvshuf.b $xr20, $xr6, $xr11, $xr15 +# CHECK-INST: xvshuf.b $xr20, $xr6, $xr11, $xr15 +# CHECK-ENCODING: encoding: [0xd4,0xac,0x67,0x0d] + +xvshuf.h $xr29, $xr24, $xr1 +# CHECK-INST: xvshuf.h $xr29, $xr24, $xr1 +# CHECK-ENCODING: encoding: [0x1d,0x87,0x7a,0x75] + +xvshuf.w $xr15, $xr24, $xr29 +# CHECK-INST: xvshuf.w $xr15, $xr24, $xr29 +# CHECK-ENCODING: encoding: [0x0f,0x77,0x7b,0x75] + +xvshuf.d $xr27, $xr18, $xr15 +# CHECK-INST: xvshuf.d $xr27, $xr18, $xr15 +# CHECK-ENCODING: encoding: [0x5b,0xbe,0x7b,0x75] diff --git a/llvm/test/MC/LoongArch/lasx/shuf4i.s b/llvm/test/MC/LoongArch/lasx/shuf4i.s new file mode 100644 index 0000000000000000000000000000000000000000..73610e529aad445f33952ba7cf56bd9d09b836d2 --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/shuf4i.s @@ -0,0 +1,20 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvshuf4i.b $xr21, $xr28, 168 +# CHECK-INST: xvshuf4i.b $xr21, $xr28, 168 +# CHECK-ENCODING: encoding: [0x95,0xa3,0x92,0x77] + +xvshuf4i.h $xr18, $xr3, 22 +# CHECK-INST: xvshuf4i.h $xr18, $xr3, 22 +# CHECK-ENCODING: encoding: [0x72,0x58,0x94,0x77] + +xvshuf4i.w $xr0, $xr25, 82 +# CHECK-INST: xvshuf4i.w $xr0, $xr25, 82 +# CHECK-ENCODING: encoding: [0x20,0x4b,0x99,0x77] + +xvshuf4i.d $xr24, $xr4, 99 +# CHECK-INST: xvshuf4i.d $xr24, $xr4, 99 +# CHECK-ENCODING: encoding: [0x98,0x8c,0x9d,0x77] diff --git a/llvm/test/MC/LoongArch/lasx/signcov.s b/llvm/test/MC/LoongArch/lasx/signcov.s new file mode 100644 index 0000000000000000000000000000000000000000..9656f7d781c864fa2fe018dab74783268bf23c40 --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/signcov.s @@ -0,0 +1,20 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvsigncov.b $xr1, $xr24, $xr13 +# CHECK-INST: xvsigncov.b $xr1, $xr24, $xr13 +# CHECK-ENCODING: encoding: [0x01,0x37,0x2e,0x75] + +xvsigncov.h $xr8, $xr23, $xr14 +# CHECK-INST: xvsigncov.h $xr8, $xr23, $xr14 +# CHECK-ENCODING: encoding: [0xe8,0xba,0x2e,0x75] + +xvsigncov.w $xr3, $xr25, $xr10 +# CHECK-INST: xvsigncov.w $xr3, $xr25, $xr10 +# CHECK-ENCODING: encoding: [0x23,0x2b,0x2f,0x75] + +xvsigncov.d $xr26, $xr17, $xr31 +# CHECK-INST: xvsigncov.d $xr26, $xr17, $xr31 +# CHECK-ENCODING: encoding: [0x3a,0xfe,0x2f,0x75] diff --git a/llvm/test/MC/LoongArch/lasx/sle.s b/llvm/test/MC/LoongArch/lasx/sle.s new file mode 100644 index 0000000000000000000000000000000000000000..062eca5d1ec89089028e43a7ef9e7dd8b2107529 --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/sle.s @@ -0,0 +1,68 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvsle.b $xr24, $xr30, $xr29 +# CHECK-INST: xvsle.b $xr24, $xr30, $xr29 +# CHECK-ENCODING: encoding: [0xd8,0x77,0x02,0x74] + +xvsle.h $xr23, $xr13, $xr20 +# CHECK-INST: xvsle.h $xr23, $xr13, $xr20 +# CHECK-ENCODING: encoding: [0xb7,0xd1,0x02,0x74] + +xvsle.w $xr10, $xr31, $xr24 +# CHECK-INST: xvsle.w $xr10, $xr31, $xr24 +# CHECK-ENCODING: encoding: [0xea,0x63,0x03,0x74] + +xvsle.d $xr13, $xr26, $xr8 +# CHECK-INST: xvsle.d $xr13, $xr26, $xr8 +# CHECK-ENCODING: encoding: [0x4d,0xa3,0x03,0x74] + +xvslei.b $xr14, $xr11, -10 +# CHECK-INST: xvslei.b $xr14, $xr11, -10 +# CHECK-ENCODING: encoding: [0x6e,0x59,0x82,0x76] + +xvslei.h $xr2, $xr22, 15 +# CHECK-INST: xvslei.h $xr2, $xr22, 15 +# CHECK-ENCODING: encoding: [0xc2,0xbe,0x82,0x76] + +xvslei.w $xr3, $xr14, 12 +# CHECK-INST: xvslei.w $xr3, $xr14, 12 +# CHECK-ENCODING: encoding: [0xc3,0x31,0x83,0x76] + +xvslei.d $xr19, $xr30, 10 +# CHECK-INST: xvslei.d $xr19, $xr30, 10 +# CHECK-ENCODING: encoding: [0xd3,0xab,0x83,0x76] + +xvsle.bu $xr9, $xr27, $xr2 +# CHECK-INST: xvsle.bu $xr9, $xr27, $xr2 +# CHECK-ENCODING: encoding: [0x69,0x0b,0x04,0x74] + +xvsle.hu $xr29, $xr25, $xr22 +# CHECK-INST: xvsle.hu $xr29, $xr25, $xr22 +# CHECK-ENCODING: encoding: [0x3d,0xdb,0x04,0x74] + +xvsle.wu $xr16, $xr25, $xr14 +# CHECK-INST: xvsle.wu $xr16, $xr25, $xr14 +# CHECK-ENCODING: encoding: [0x30,0x3b,0x05,0x74] + +xvsle.du $xr5, $xr6, $xr18 +# CHECK-INST: xvsle.du $xr5, $xr6, $xr18 +# CHECK-ENCODING: encoding: [0xc5,0xc8,0x05,0x74] + +xvslei.bu $xr17, $xr26, 10 +# CHECK-INST: xvslei.bu $xr17, $xr26, 10 +# CHECK-ENCODING: encoding: [0x51,0x2b,0x84,0x76] + +xvslei.hu $xr20, $xr11, 18 +# CHECK-INST: xvslei.hu $xr20, $xr11, 18 +# CHECK-ENCODING: encoding: [0x74,0xc9,0x84,0x76] + +xvslei.wu $xr1, $xr29, 10 +# CHECK-INST: xvslei.wu $xr1, $xr29, 10 +# CHECK-ENCODING: encoding: [0xa1,0x2b,0x85,0x76] + +xvslei.du $xr25, $xr31, 24 +# CHECK-INST: xvslei.du $xr25, $xr31, 24 +# CHECK-ENCODING: encoding: [0xf9,0xe3,0x85,0x76] diff --git a/llvm/test/MC/LoongArch/lasx/sll.s b/llvm/test/MC/LoongArch/lasx/sll.s new file mode 100644 index 0000000000000000000000000000000000000000..ef9d3e5c47e385b599081c8d964c0d7caff09bc5 --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/sll.s @@ -0,0 +1,36 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvsll.b $xr8, $xr29, $xr9 +# CHECK-INST: xvsll.b $xr8, $xr29, $xr9 +# CHECK-ENCODING: encoding: [0xa8,0x27,0xe8,0x74] + +xvsll.h $xr21, $xr28, $xr29 +# CHECK-INST: xvsll.h $xr21, $xr28, $xr29 +# CHECK-ENCODING: encoding: [0x95,0xf7,0xe8,0x74] + +xvsll.w $xr17, $xr30, $xr10 +# CHECK-INST: xvsll.w $xr17, $xr30, $xr10 +# CHECK-ENCODING: encoding: [0xd1,0x2b,0xe9,0x74] + +xvsll.d $xr19, $xr6, $xr26 +# CHECK-INST: xvsll.d $xr19, $xr6, $xr26 +# CHECK-ENCODING: encoding: [0xd3,0xe8,0xe9,0x74] + +xvslli.b $xr25, $xr26, 1 +# CHECK-INST: xvslli.b $xr25, $xr26, 1 +# CHECK-ENCODING: encoding: [0x59,0x27,0x2c,0x77] + +xvslli.h $xr17, $xr28, 14 +# CHECK-INST: xvslli.h $xr17, $xr28, 14 +# CHECK-ENCODING: encoding: [0x91,0x7b,0x2c,0x77] + +xvslli.w $xr26, $xr31, 29 +# CHECK-INST: xvslli.w $xr26, $xr31, 29 +# CHECK-ENCODING: encoding: [0xfa,0xf7,0x2c,0x77] + +xvslli.d $xr10, $xr28, 46 +# CHECK-INST: xvslli.d $xr10, $xr28, 46 +# CHECK-ENCODING: encoding: [0x8a,0xbb,0x2d,0x77] diff --git a/llvm/test/MC/LoongArch/lasx/sllwil.s b/llvm/test/MC/LoongArch/lasx/sllwil.s new file mode 100644 index 0000000000000000000000000000000000000000..0e89ccf02b510ca94cc45f9a2e6ebd2dce2fea9b --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/sllwil.s @@ -0,0 +1,28 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvsllwil.h.b $xr13, $xr21, 6 +# CHECK-INST: xvsllwil.h.b $xr13, $xr21, 6 +# CHECK-ENCODING: encoding: [0xad,0x3a,0x08,0x77] + +xvsllwil.w.h $xr20, $xr29, 0 +# CHECK-INST: xvsllwil.w.h $xr20, $xr29, 0 +# CHECK-ENCODING: encoding: [0xb4,0x43,0x08,0x77] + +xvsllwil.d.w $xr3, $xr20, 24 +# CHECK-INST: xvsllwil.d.w $xr3, $xr20, 24 +# CHECK-ENCODING: encoding: [0x83,0xe2,0x08,0x77] + +xvsllwil.hu.bu $xr15, $xr15, 6 +# CHECK-INST: xvsllwil.hu.bu $xr15, $xr15, 6 +# CHECK-ENCODING: encoding: [0xef,0x39,0x0c,0x77] + +xvsllwil.wu.hu $xr22, $xr29, 0 +# CHECK-INST: xvsllwil.wu.hu $xr22, $xr29, 0 +# CHECK-ENCODING: encoding: [0xb6,0x43,0x0c,0x77] + +xvsllwil.du.wu $xr3, $xr5, 31 +# CHECK-INST: xvsllwil.du.wu $xr3, $xr5, 31 +# CHECK-ENCODING: encoding: [0xa3,0xfc,0x0c,0x77] diff --git a/llvm/test/MC/LoongArch/lasx/slt.s b/llvm/test/MC/LoongArch/lasx/slt.s new file mode 100644 index 0000000000000000000000000000000000000000..40308a59fc57eae4df2f4e2da9a5ef2696742e87 --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/slt.s @@ -0,0 +1,68 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvslt.b $xr30, $xr31, $xr13 +# CHECK-INST: xvslt.b $xr30, $xr31, $xr13 +# CHECK-ENCODING: encoding: [0xfe,0x37,0x06,0x74] + +xvslt.h $xr19, $xr23, $xr0 +# CHECK-INST: xvslt.h $xr19, $xr23, $xr0 +# CHECK-ENCODING: encoding: [0xf3,0x82,0x06,0x74] + +xvslt.w $xr23, $xr26, $xr3 +# CHECK-INST: xvslt.w $xr23, $xr26, $xr3 +# CHECK-ENCODING: encoding: [0x57,0x0f,0x07,0x74] + +xvslt.d $xr3, $xr10, $xr31 +# CHECK-INST: xvslt.d $xr3, $xr10, $xr31 +# CHECK-ENCODING: encoding: [0x43,0xfd,0x07,0x74] + +xvslti.b $xr31, $xr27, 6 +# CHECK-INST: xvslti.b $xr31, $xr27, 6 +# CHECK-ENCODING: encoding: [0x7f,0x1b,0x86,0x76] + +xvslti.h $xr5, $xr19, 6 +# CHECK-INST: xvslti.h $xr5, $xr19, 6 +# CHECK-ENCODING: encoding: [0x65,0x9a,0x86,0x76] + +xvslti.w $xr20, $xr8, 11 +# CHECK-INST: xvslti.w $xr20, $xr8, 11 +# CHECK-ENCODING: encoding: [0x14,0x2d,0x87,0x76] + +xvslti.d $xr13, $xr18, 2 +# CHECK-INST: xvslti.d $xr13, $xr18, 2 +# CHECK-ENCODING: encoding: [0x4d,0x8a,0x87,0x76] + +xvslt.bu $xr20, $xr13, $xr29 +# CHECK-INST: xvslt.bu $xr20, $xr13, $xr29 +# CHECK-ENCODING: encoding: [0xb4,0x75,0x08,0x74] + +xvslt.hu $xr12, $xr29, $xr26 +# CHECK-INST: xvslt.hu $xr12, $xr29, $xr26 +# CHECK-ENCODING: encoding: [0xac,0xeb,0x08,0x74] + +xvslt.wu $xr26, $xr25, $xr31 +# CHECK-INST: xvslt.wu $xr26, $xr25, $xr31 +# CHECK-ENCODING: encoding: [0x3a,0x7f,0x09,0x74] + +xvslt.du $xr30, $xr20, $xr3 +# CHECK-INST: xvslt.du $xr30, $xr20, $xr3 +# CHECK-ENCODING: encoding: [0x9e,0x8e,0x09,0x74] + +xvslti.bu $xr1, $xr4, 2 +# CHECK-INST: xvslti.bu $xr1, $xr4, 2 +# CHECK-ENCODING: encoding: [0x81,0x08,0x88,0x76] + +xvslti.hu $xr0, $xr5, 20 +# CHECK-INST: xvslti.hu $xr0, $xr5, 20 +# CHECK-ENCODING: encoding: [0xa0,0xd0,0x88,0x76] + +xvslti.wu $xr0, $xr25, 24 +# CHECK-INST: xvslti.wu $xr0, $xr25, 24 +# CHECK-ENCODING: encoding: [0x20,0x63,0x89,0x76] + +xvslti.du $xr10, $xr5, 29 +# CHECK-INST: xvslti.du $xr10, $xr5, 29 +# CHECK-ENCODING: encoding: [0xaa,0xf4,0x89,0x76] diff --git a/llvm/test/MC/LoongArch/lasx/sra.s b/llvm/test/MC/LoongArch/lasx/sra.s new file mode 100644 index 0000000000000000000000000000000000000000..f9bc842f1e1513928d0f4a675e0a53e66cbe778b --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/sra.s @@ -0,0 +1,36 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvsra.b $xr11, $xr2, $xr0 +# CHECK-INST: xvsra.b $xr11, $xr2, $xr0 +# CHECK-ENCODING: encoding: [0x4b,0x00,0xec,0x74] + +xvsra.h $xr17, $xr27, $xr6 +# CHECK-INST: xvsra.h $xr17, $xr27, $xr6 +# CHECK-ENCODING: encoding: [0x71,0x9b,0xec,0x74] + +xvsra.w $xr13, $xr12, $xr12 +# CHECK-INST: xvsra.w $xr13, $xr12, $xr12 +# CHECK-ENCODING: encoding: [0x8d,0x31,0xed,0x74] + +xvsra.d $xr6, $xr15, $xr1 +# CHECK-INST: xvsra.d $xr6, $xr15, $xr1 +# CHECK-ENCODING: encoding: [0xe6,0x85,0xed,0x74] + +xvsrai.b $xr16, $xr2, 3 +# CHECK-INST: xvsrai.b $xr16, $xr2, 3 +# CHECK-ENCODING: encoding: [0x50,0x2c,0x34,0x77] + +xvsrai.h $xr14, $xr3, 12 +# CHECK-INST: xvsrai.h $xr14, $xr3, 12 +# CHECK-ENCODING: encoding: [0x6e,0x70,0x34,0x77] + +xvsrai.w $xr17, $xr18, 21 +# CHECK-INST: xvsrai.w $xr17, $xr18, 21 +# CHECK-ENCODING: encoding: [0x51,0xd6,0x34,0x77] + +xvsrai.d $xr10, $xr20, 4 +# CHECK-INST: xvsrai.d $xr10, $xr20, 4 +# CHECK-ENCODING: encoding: [0x8a,0x12,0x35,0x77] diff --git a/llvm/test/MC/LoongArch/lasx/sran.s b/llvm/test/MC/LoongArch/lasx/sran.s new file mode 100644 index 0000000000000000000000000000000000000000..3e0613c124e8cd9485c2644c36c3371f6de057b4 --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/sran.s @@ -0,0 +1,16 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvsran.b.h $xr30, $xr13, $xr3 +# CHECK-INST: xvsran.b.h $xr30, $xr13, $xr3 +# CHECK-ENCODING: encoding: [0xbe,0x8d,0xf6,0x74] + +xvsran.h.w $xr18, $xr26, $xr4 +# CHECK-INST: xvsran.h.w $xr18, $xr26, $xr4 +# CHECK-ENCODING: encoding: [0x52,0x13,0xf7,0x74] + +xvsran.w.d $xr27, $xr19, $xr21 +# CHECK-INST: xvsran.w.d $xr27, $xr19, $xr21 +# CHECK-ENCODING: encoding: [0x7b,0xd6,0xf7,0x74] diff --git a/llvm/test/MC/LoongArch/lasx/srani.s b/llvm/test/MC/LoongArch/lasx/srani.s new file mode 100644 index 0000000000000000000000000000000000000000..e8d9713671cda27cb95639accd13dc1d01f48b65 --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/srani.s @@ -0,0 +1,20 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvsrani.b.h $xr14, $xr23, 15 +# CHECK-INST: xvsrani.b.h $xr14, $xr23, 15 +# CHECK-ENCODING: encoding: [0xee,0x7e,0x58,0x77] + +xvsrani.h.w $xr2, $xr8, 5 +# CHECK-INST: xvsrani.h.w $xr2, $xr8, 5 +# CHECK-ENCODING: encoding: [0x02,0x95,0x58,0x77] + +xvsrani.w.d $xr5, $xr11, 14 +# CHECK-INST: xvsrani.w.d $xr5, $xr11, 14 +# CHECK-ENCODING: encoding: [0x65,0x39,0x59,0x77] + +xvsrani.d.q $xr17, $xr7, 113 +# CHECK-INST: xvsrani.d.q $xr17, $xr7, 113 +# CHECK-ENCODING: encoding: [0xf1,0xc4,0x5b,0x77] diff --git a/llvm/test/MC/LoongArch/lasx/srar.s b/llvm/test/MC/LoongArch/lasx/srar.s new file mode 100644 index 0000000000000000000000000000000000000000..c247c78ecb8cd0a444486f6463e95aeb07e6b541 --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/srar.s @@ -0,0 +1,36 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvsrar.b $xr9, $xr18, $xr11 +# CHECK-INST: xvsrar.b $xr9, $xr18, $xr11 +# CHECK-ENCODING: encoding: [0x49,0x2e,0xf2,0x74] + +xvsrar.h $xr15, $xr26, $xr1 +# CHECK-INST: xvsrar.h $xr15, $xr26, $xr1 +# CHECK-ENCODING: encoding: [0x4f,0x87,0xf2,0x74] + +xvsrar.w $xr17, $xr19, $xr14 +# CHECK-INST: xvsrar.w $xr17, $xr19, $xr14 +# CHECK-ENCODING: encoding: [0x71,0x3a,0xf3,0x74] + +xvsrar.d $xr19, $xr15, $xr6 +# CHECK-INST: xvsrar.d $xr19, $xr15, $xr6 +# CHECK-ENCODING: encoding: [0xf3,0x99,0xf3,0x74] + +xvsrari.b $xr10, $xr28, 3 +# CHECK-INST: xvsrari.b $xr10, $xr28, 3 +# CHECK-ENCODING: encoding: [0x8a,0x2f,0xa8,0x76] + +xvsrari.h $xr28, $xr1, 14 +# CHECK-INST: xvsrari.h $xr28, $xr1, 14 +# CHECK-ENCODING: encoding: [0x3c,0x78,0xa8,0x76] + +xvsrari.w $xr13, $xr7, 12 +# CHECK-INST: xvsrari.w $xr13, $xr7, 12 +# CHECK-ENCODING: encoding: [0xed,0xb0,0xa8,0x76] + +xvsrari.d $xr29, $xr9, 8 +# CHECK-INST: xvsrari.d $xr29, $xr9, 8 +# CHECK-ENCODING: encoding: [0x3d,0x21,0xa9,0x76] diff --git a/llvm/test/MC/LoongArch/lasx/srarn.s b/llvm/test/MC/LoongArch/lasx/srarn.s new file mode 100644 index 0000000000000000000000000000000000000000..e963f2e589fa1bc80c42761d4b66b3ff33d1f0b7 --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/srarn.s @@ -0,0 +1,16 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvsrarn.b.h $xr18, $xr20, $xr15 +# CHECK-INST: xvsrarn.b.h $xr18, $xr20, $xr15 +# CHECK-ENCODING: encoding: [0x92,0xbe,0xfa,0x74] + +xvsrarn.h.w $xr12, $xr1, $xr4 +# CHECK-INST: xvsrarn.h.w $xr12, $xr1, $xr4 +# CHECK-ENCODING: encoding: [0x2c,0x10,0xfb,0x74] + +xvsrarn.w.d $xr9, $xr18, $xr26 +# CHECK-INST: xvsrarn.w.d $xr9, $xr18, $xr26 +# CHECK-ENCODING: encoding: [0x49,0xea,0xfb,0x74] diff --git a/llvm/test/MC/LoongArch/lasx/srarni.s b/llvm/test/MC/LoongArch/lasx/srarni.s new file mode 100644 index 0000000000000000000000000000000000000000..eda38ef997e1dbe3e066cebc5faac03a0912d2f5 --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/srarni.s @@ -0,0 +1,20 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvsrarni.b.h $xr21, $xr31, 15 +# CHECK-INST: xvsrarni.b.h $xr21, $xr31, 15 +# CHECK-ENCODING: encoding: [0xf5,0x7f,0x5c,0x77] + +xvsrarni.h.w $xr4, $xr22, 25 +# CHECK-INST: xvsrarni.h.w $xr4, $xr22, 25 +# CHECK-ENCODING: encoding: [0xc4,0xe6,0x5c,0x77] + +xvsrarni.w.d $xr24, $xr8, 41 +# CHECK-INST: xvsrarni.w.d $xr24, $xr8, 41 +# CHECK-ENCODING: encoding: [0x18,0xa5,0x5d,0x77] + +xvsrarni.d.q $xr7, $xr5, 7 +# CHECK-INST: xvsrarni.d.q $xr7, $xr5, 7 +# CHECK-ENCODING: encoding: [0xa7,0x1c,0x5e,0x77] diff --git a/llvm/test/MC/LoongArch/lasx/srl.s b/llvm/test/MC/LoongArch/lasx/srl.s new file mode 100644 index 0000000000000000000000000000000000000000..0d0607d0f3d2cdefb4a417a40c9806fc93e3151a --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/srl.s @@ -0,0 +1,36 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvsrl.b $xr20, $xr24, $xr29 +# CHECK-INST: xvsrl.b $xr20, $xr24, $xr29 +# CHECK-ENCODING: encoding: [0x14,0x77,0xea,0x74] + +xvsrl.h $xr11, $xr17, $xr31 +# CHECK-INST: xvsrl.h $xr11, $xr17, $xr31 +# CHECK-ENCODING: encoding: [0x2b,0xfe,0xea,0x74] + +xvsrl.w $xr2, $xr10, $xr8 +# CHECK-INST: xvsrl.w $xr2, $xr10, $xr8 +# CHECK-ENCODING: encoding: [0x42,0x21,0xeb,0x74] + +xvsrl.d $xr13, $xr30, $xr26 +# CHECK-INST: xvsrl.d $xr13, $xr30, $xr26 +# CHECK-ENCODING: encoding: [0xcd,0xeb,0xeb,0x74] + +xvsrli.b $xr29, $xr4, 3 +# CHECK-INST: xvsrli.b $xr29, $xr4, 3 +# CHECK-ENCODING: encoding: [0x9d,0x2c,0x30,0x77] + +xvsrli.h $xr28, $xr14, 12 +# CHECK-INST: xvsrli.h $xr28, $xr14, 12 +# CHECK-ENCODING: encoding: [0xdc,0x71,0x30,0x77] + +xvsrli.w $xr12, $xr18, 7 +# CHECK-INST: xvsrli.w $xr12, $xr18, 7 +# CHECK-ENCODING: encoding: [0x4c,0x9e,0x30,0x77] + +xvsrli.d $xr0, $xr4, 46 +# CHECK-INST: xvsrli.d $xr0, $xr4, 46 +# CHECK-ENCODING: encoding: [0x80,0xb8,0x31,0x77] diff --git a/llvm/test/MC/LoongArch/lasx/srln.s b/llvm/test/MC/LoongArch/lasx/srln.s new file mode 100644 index 0000000000000000000000000000000000000000..af6249919362415ca7f7f2cb750fb213c20e7939 --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/srln.s @@ -0,0 +1,16 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvsrln.b.h $xr7, $xr13, $xr5 +# CHECK-INST: xvsrln.b.h $xr7, $xr13, $xr5 +# CHECK-ENCODING: encoding: [0xa7,0x95,0xf4,0x74] + +xvsrln.h.w $xr6, $xr18, $xr5 +# CHECK-INST: xvsrln.h.w $xr6, $xr18, $xr5 +# CHECK-ENCODING: encoding: [0x46,0x16,0xf5,0x74] + +xvsrln.w.d $xr12, $xr12, $xr28 +# CHECK-INST: xvsrln.w.d $xr12, $xr12, $xr28 +# CHECK-ENCODING: encoding: [0x8c,0xf1,0xf5,0x74] diff --git a/llvm/test/MC/LoongArch/lasx/srlni.s b/llvm/test/MC/LoongArch/lasx/srlni.s new file mode 100644 index 0000000000000000000000000000000000000000..917c9a752c1fbe4388db7527fe89eebbc3e3d70e --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/srlni.s @@ -0,0 +1,20 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvsrlni.b.h $xr5, $xr8, 2 +# CHECK-INST: xvsrlni.b.h $xr5, $xr8, 2 +# CHECK-ENCODING: encoding: [0x05,0x49,0x40,0x77] + +xvsrlni.h.w $xr7, $xr4, 20 +# CHECK-INST: xvsrlni.h.w $xr7, $xr4, 20 +# CHECK-ENCODING: encoding: [0x87,0xd0,0x40,0x77] + +xvsrlni.w.d $xr30, $xr15, 17 +# CHECK-INST: xvsrlni.w.d $xr30, $xr15, 17 +# CHECK-ENCODING: encoding: [0xfe,0x45,0x41,0x77] + +xvsrlni.d.q $xr15, $xr28, 95 +# CHECK-INST: xvsrlni.d.q $xr15, $xr28, 95 +# CHECK-ENCODING: encoding: [0x8f,0x7f,0x43,0x77] diff --git a/llvm/test/MC/LoongArch/lasx/srlr.s b/llvm/test/MC/LoongArch/lasx/srlr.s new file mode 100644 index 0000000000000000000000000000000000000000..d87cc2291fb4d38099bee4b5ea51d6bb5afd9dfb --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/srlr.s @@ -0,0 +1,36 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvsrlr.b $xr18, $xr11, $xr5 +# CHECK-INST: xvsrlr.b $xr18, $xr11, $xr5 +# CHECK-ENCODING: encoding: [0x72,0x15,0xf0,0x74] + +xvsrlr.h $xr31, $xr5, $xr21 +# CHECK-INST: xvsrlr.h $xr31, $xr5, $xr21 +# CHECK-ENCODING: encoding: [0xbf,0xd4,0xf0,0x74] + +xvsrlr.w $xr7, $xr5, $xr1 +# CHECK-INST: xvsrlr.w $xr7, $xr5, $xr1 +# CHECK-ENCODING: encoding: [0xa7,0x04,0xf1,0x74] + +xvsrlr.d $xr4, $xr27, $xr7 +# CHECK-INST: xvsrlr.d $xr4, $xr27, $xr7 +# CHECK-ENCODING: encoding: [0x64,0x9f,0xf1,0x74] + +xvsrlri.b $xr29, $xr30, 4 +# CHECK-INST: xvsrlri.b $xr29, $xr30, 4 +# CHECK-ENCODING: encoding: [0xdd,0x33,0xa4,0x76] + +xvsrlri.h $xr16, $xr6, 14 +# CHECK-INST: xvsrlri.h $xr16, $xr6, 14 +# CHECK-ENCODING: encoding: [0xd0,0x78,0xa4,0x76] + +xvsrlri.w $xr24, $xr10, 28 +# CHECK-INST: xvsrlri.w $xr24, $xr10, 28 +# CHECK-ENCODING: encoding: [0x58,0xf1,0xa4,0x76] + +xvsrlri.d $xr20, $xr20, 52 +# CHECK-INST: xvsrlri.d $xr20, $xr20, 52 +# CHECK-ENCODING: encoding: [0x94,0xd2,0xa5,0x76] diff --git a/llvm/test/MC/LoongArch/lasx/srlrn.s b/llvm/test/MC/LoongArch/lasx/srlrn.s new file mode 100644 index 0000000000000000000000000000000000000000..3a70d97bfc1182572b4da036b020a117787e2008 --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/srlrn.s @@ -0,0 +1,16 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvsrlrn.b.h $xr4, $xr25, $xr26 +# CHECK-INST: xvsrlrn.b.h $xr4, $xr25, $xr26 +# CHECK-ENCODING: encoding: [0x24,0xeb,0xf8,0x74] + +xvsrlrn.h.w $xr17, $xr5, $xr1 +# CHECK-INST: xvsrlrn.h.w $xr17, $xr5, $xr1 +# CHECK-ENCODING: encoding: [0xb1,0x04,0xf9,0x74] + +xvsrlrn.w.d $xr29, $xr1, $xr17 +# CHECK-INST: xvsrlrn.w.d $xr29, $xr1, $xr17 +# CHECK-ENCODING: encoding: [0x3d,0xc4,0xf9,0x74] diff --git a/llvm/test/MC/LoongArch/lasx/srlrni.s b/llvm/test/MC/LoongArch/lasx/srlrni.s new file mode 100644 index 0000000000000000000000000000000000000000..ccee17fb71b1eddac8d7d9d32af7f5a87753ff51 --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/srlrni.s @@ -0,0 +1,20 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvsrlrni.b.h $xr10, $xr17, 12 +# CHECK-INST: xvsrlrni.b.h $xr10, $xr17, 12 +# CHECK-ENCODING: encoding: [0x2a,0x72,0x44,0x77] + +xvsrlrni.h.w $xr22, $xr23, 13 +# CHECK-INST: xvsrlrni.h.w $xr22, $xr23, 13 +# CHECK-ENCODING: encoding: [0xf6,0xb6,0x44,0x77] + +xvsrlrni.w.d $xr18, $xr22, 58 +# CHECK-INST: xvsrlrni.w.d $xr18, $xr22, 58 +# CHECK-ENCODING: encoding: [0xd2,0xea,0x45,0x77] + +xvsrlrni.d.q $xr25, $xr8, 42 +# CHECK-INST: xvsrlrni.d.q $xr25, $xr8, 42 +# CHECK-ENCODING: encoding: [0x19,0xa9,0x46,0x77] diff --git a/llvm/test/MC/LoongArch/lasx/ssran.s b/llvm/test/MC/LoongArch/lasx/ssran.s new file mode 100644 index 0000000000000000000000000000000000000000..fe6333e520d54249ef7ca47615121196cfa24c1d --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/ssran.s @@ -0,0 +1,28 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvssran.b.h $xr17, $xr4, $xr1 +# CHECK-INST: xvssran.b.h $xr17, $xr4, $xr1 +# CHECK-ENCODING: encoding: [0x91,0x84,0xfe,0x74] + +xvssran.h.w $xr28, $xr28, $xr13 +# CHECK-INST: xvssran.h.w $xr28, $xr28, $xr13 +# CHECK-ENCODING: encoding: [0x9c,0x37,0xff,0x74] + +xvssran.w.d $xr21, $xr1, $xr31 +# CHECK-INST: xvssran.w.d $xr21, $xr1, $xr31 +# CHECK-ENCODING: encoding: [0x35,0xfc,0xff,0x74] + +xvssran.bu.h $xr3, $xr12, $xr24 +# CHECK-INST: xvssran.bu.h $xr3, $xr12, $xr24 +# CHECK-ENCODING: encoding: [0x83,0xe1,0x06,0x75] + +xvssran.hu.w $xr25, $xr24, $xr1 +# CHECK-INST: xvssran.hu.w $xr25, $xr24, $xr1 +# CHECK-ENCODING: encoding: [0x19,0x07,0x07,0x75] + +xvssran.wu.d $xr30, $xr14, $xr10 +# CHECK-INST: xvssran.wu.d $xr30, $xr14, $xr10 +# CHECK-ENCODING: encoding: [0xde,0xa9,0x07,0x75] diff --git a/llvm/test/MC/LoongArch/lasx/ssrani.s b/llvm/test/MC/LoongArch/lasx/ssrani.s new file mode 100644 index 0000000000000000000000000000000000000000..0074b3141558d2abbc7e4e8913b5f569878a2334 --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/ssrani.s @@ -0,0 +1,36 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvssrani.b.h $xr26, $xr22, 14 +# CHECK-INST: xvssrani.b.h $xr26, $xr22, 14 +# CHECK-ENCODING: encoding: [0xda,0x7a,0x60,0x77] + +xvssrani.h.w $xr19, $xr14, 26 +# CHECK-INST: xvssrani.h.w $xr19, $xr14, 26 +# CHECK-ENCODING: encoding: [0xd3,0xe9,0x60,0x77] + +xvssrani.w.d $xr1, $xr27, 27 +# CHECK-INST: xvssrani.w.d $xr1, $xr27, 27 +# CHECK-ENCODING: encoding: [0x61,0x6f,0x61,0x77] + +xvssrani.d.q $xr9, $xr10, 59 +# CHECK-INST: xvssrani.d.q $xr9, $xr10, 59 +# CHECK-ENCODING: encoding: [0x49,0xed,0x62,0x77] + +xvssrani.bu.h $xr6, $xr3, 10 +# CHECK-INST: xvssrani.bu.h $xr6, $xr3, 10 +# CHECK-ENCODING: encoding: [0x66,0x68,0x64,0x77] + +xvssrani.hu.w $xr20, $xr9, 6 +# CHECK-INST: xvssrani.hu.w $xr20, $xr9, 6 +# CHECK-ENCODING: encoding: [0x34,0x99,0x64,0x77] + +xvssrani.wu.d $xr24, $xr11, 8 +# CHECK-INST: xvssrani.wu.d $xr24, $xr11, 8 +# CHECK-ENCODING: encoding: [0x78,0x21,0x65,0x77] + +xvssrani.du.q $xr16, $xr2, 15 +# CHECK-INST: xvssrani.du.q $xr16, $xr2, 15 +# CHECK-ENCODING: encoding: [0x50,0x3c,0x66,0x77] diff --git a/llvm/test/MC/LoongArch/lasx/ssrarn.s b/llvm/test/MC/LoongArch/lasx/ssrarn.s new file mode 100644 index 0000000000000000000000000000000000000000..6024c67647b64c32df182a84ccdd4c85d5a45667 --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/ssrarn.s @@ -0,0 +1,28 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvssrarn.b.h $xr7, $xr13, $xr0 +# CHECK-INST: xvssrarn.b.h $xr7, $xr13, $xr0 +# CHECK-ENCODING: encoding: [0xa7,0x81,0x02,0x75] + +xvssrarn.h.w $xr22, $xr2, $xr14 +# CHECK-INST: xvssrarn.h.w $xr22, $xr2, $xr14 +# CHECK-ENCODING: encoding: [0x56,0x38,0x03,0x75] + +xvssrarn.w.d $xr13, $xr7, $xr16 +# CHECK-INST: xvssrarn.w.d $xr13, $xr7, $xr16 +# CHECK-ENCODING: encoding: [0xed,0xc0,0x03,0x75] + +xvssrarn.bu.h $xr4, $xr12, $xr2 +# CHECK-INST: xvssrarn.bu.h $xr4, $xr12, $xr2 +# CHECK-ENCODING: encoding: [0x84,0x89,0x0a,0x75] + +xvssrarn.hu.w $xr15, $xr24, $xr3 +# CHECK-INST: xvssrarn.hu.w $xr15, $xr24, $xr3 +# CHECK-ENCODING: encoding: [0x0f,0x0f,0x0b,0x75] + +xvssrarn.wu.d $xr30, $xr9, $xr8 +# CHECK-INST: xvssrarn.wu.d $xr30, $xr9, $xr8 +# CHECK-ENCODING: encoding: [0x3e,0xa1,0x0b,0x75] diff --git a/llvm/test/MC/LoongArch/lasx/ssrarni.s b/llvm/test/MC/LoongArch/lasx/ssrarni.s new file mode 100644 index 0000000000000000000000000000000000000000..ee6cf98067d3c76e3ca2e3ef8f84a8f9661af53a --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/ssrarni.s @@ -0,0 +1,36 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvssrarni.b.h $xr0, $xr4, 13 +# CHECK-INST: xvssrarni.b.h $xr0, $xr4, 13 +# CHECK-ENCODING: encoding: [0x80,0x74,0x68,0x77] + +xvssrarni.h.w $xr8, $xr0, 9 +# CHECK-INST: xvssrarni.h.w $xr8, $xr0, 9 +# CHECK-ENCODING: encoding: [0x08,0xa4,0x68,0x77] + +xvssrarni.w.d $xr5, $xr5, 42 +# CHECK-INST: xvssrarni.w.d $xr5, $xr5, 42 +# CHECK-ENCODING: encoding: [0xa5,0xa8,0x69,0x77] + +xvssrarni.d.q $xr8, $xr31, 83 +# CHECK-INST: xvssrarni.d.q $xr8, $xr31, 83 +# CHECK-ENCODING: encoding: [0xe8,0x4f,0x6b,0x77] + +xvssrarni.bu.h $xr21, $xr19, 0 +# CHECK-INST: xvssrarni.bu.h $xr21, $xr19, 0 +# CHECK-ENCODING: encoding: [0x75,0x42,0x6c,0x77] + +xvssrarni.hu.w $xr22, $xr13, 1 +# CHECK-INST: xvssrarni.hu.w $xr22, $xr13, 1 +# CHECK-ENCODING: encoding: [0xb6,0x85,0x6c,0x77] + +xvssrarni.wu.d $xr21, $xr5, 26 +# CHECK-INST: xvssrarni.wu.d $xr21, $xr5, 26 +# CHECK-ENCODING: encoding: [0xb5,0x68,0x6d,0x77] + +xvssrarni.du.q $xr15, $xr14, 94 +# CHECK-INST: xvssrarni.du.q $xr15, $xr14, 94 +# CHECK-ENCODING: encoding: [0xcf,0x79,0x6f,0x77] diff --git a/llvm/test/MC/LoongArch/lasx/ssrln.s b/llvm/test/MC/LoongArch/lasx/ssrln.s new file mode 100644 index 0000000000000000000000000000000000000000..7df61781907129284b098f78657bf43851aee98d --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/ssrln.s @@ -0,0 +1,28 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvssrln.b.h $xr24, $xr4, $xr4 +# CHECK-INST: xvssrln.b.h $xr24, $xr4, $xr4 +# CHECK-ENCODING: encoding: [0x98,0x90,0xfc,0x74] + +xvssrln.h.w $xr5, $xr15, $xr0 +# CHECK-INST: xvssrln.h.w $xr5, $xr15, $xr0 +# CHECK-ENCODING: encoding: [0xe5,0x01,0xfd,0x74] + +xvssrln.w.d $xr0, $xr25, $xr30 +# CHECK-INST: xvssrln.w.d $xr0, $xr25, $xr30 +# CHECK-ENCODING: encoding: [0x20,0xfb,0xfd,0x74] + +xvssrln.bu.h $xr26, $xr9, $xr26 +# CHECK-INST: xvssrln.bu.h $xr26, $xr9, $xr26 +# CHECK-ENCODING: encoding: [0x3a,0xe9,0x04,0x75] + +xvssrln.hu.w $xr7, $xr20, $xr1 +# CHECK-INST: xvssrln.hu.w $xr7, $xr20, $xr1 +# CHECK-ENCODING: encoding: [0x87,0x06,0x05,0x75] + +xvssrln.wu.d $xr15, $xr13, $xr20 +# CHECK-INST: xvssrln.wu.d $xr15, $xr13, $xr20 +# CHECK-ENCODING: encoding: [0xaf,0xd1,0x05,0x75] diff --git a/llvm/test/MC/LoongArch/lasx/ssrlni.s b/llvm/test/MC/LoongArch/lasx/ssrlni.s new file mode 100644 index 0000000000000000000000000000000000000000..b8c6ca1710e1c0614ba448a14e48a2e458e7cef3 --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/ssrlni.s @@ -0,0 +1,36 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvssrlni.b.h $xr19, $xr18, 9 +# CHECK-INST: xvssrlni.b.h $xr19, $xr18, 9 +# CHECK-ENCODING: encoding: [0x53,0x66,0x48,0x77] + +xvssrlni.h.w $xr29, $xr29, 3 +# CHECK-INST: xvssrlni.h.w $xr29, $xr29, 3 +# CHECK-ENCODING: encoding: [0xbd,0x8f,0x48,0x77] + +xvssrlni.w.d $xr9, $xr15, 43 +# CHECK-INST: xvssrlni.w.d $xr9, $xr15, 43 +# CHECK-ENCODING: encoding: [0xe9,0xad,0x49,0x77] + +xvssrlni.d.q $xr8, $xr11, 121 +# CHECK-INST: xvssrlni.d.q $xr8, $xr11, 121 +# CHECK-ENCODING: encoding: [0x68,0xe5,0x4b,0x77] + +xvssrlni.bu.h $xr25, $xr10, 5 +# CHECK-INST: xvssrlni.bu.h $xr25, $xr10, 5 +# CHECK-ENCODING: encoding: [0x59,0x55,0x4c,0x77] + +xvssrlni.hu.w $xr9, $xr18, 26 +# CHECK-INST: xvssrlni.hu.w $xr9, $xr18, 26 +# CHECK-ENCODING: encoding: [0x49,0xea,0x4c,0x77] + +xvssrlni.wu.d $xr20, $xr22, 13 +# CHECK-INST: xvssrlni.wu.d $xr20, $xr22, 13 +# CHECK-ENCODING: encoding: [0xd4,0x36,0x4d,0x77] + +xvssrlni.du.q $xr8, $xr4, 43 +# CHECK-INST: xvssrlni.du.q $xr8, $xr4, 43 +# CHECK-ENCODING: encoding: [0x88,0xac,0x4e,0x77] diff --git a/llvm/test/MC/LoongArch/lasx/ssrlrn.s b/llvm/test/MC/LoongArch/lasx/ssrlrn.s new file mode 100644 index 0000000000000000000000000000000000000000..410a28deb5cf0d083284bc01c661fb69e4930eca --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/ssrlrn.s @@ -0,0 +1,28 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvssrlrn.b.h $xr8, $xr20, $xr18 +# CHECK-INST: xvssrlrn.b.h $xr8, $xr20, $xr18 +# CHECK-ENCODING: encoding: [0x88,0xca,0x00,0x75] + +xvssrlrn.h.w $xr2, $xr13, $xr19 +# CHECK-INST: xvssrlrn.h.w $xr2, $xr13, $xr19 +# CHECK-ENCODING: encoding: [0xa2,0x4d,0x01,0x75] + +xvssrlrn.w.d $xr24, $xr7, $xr5 +# CHECK-INST: xvssrlrn.w.d $xr24, $xr7, $xr5 +# CHECK-ENCODING: encoding: [0xf8,0x94,0x01,0x75] + +xvssrlrn.bu.h $xr15, $xr23, $xr18 +# CHECK-INST: xvssrlrn.bu.h $xr15, $xr23, $xr18 +# CHECK-ENCODING: encoding: [0xef,0xca,0x08,0x75] + +xvssrlrn.hu.w $xr22, $xr14, $xr16 +# CHECK-INST: xvssrlrn.hu.w $xr22, $xr14, $xr16 +# CHECK-ENCODING: encoding: [0xd6,0x41,0x09,0x75] + +xvssrlrn.wu.d $xr20, $xr28, $xr5 +# CHECK-INST: xvssrlrn.wu.d $xr20, $xr28, $xr5 +# CHECK-ENCODING: encoding: [0x94,0x97,0x09,0x75] diff --git a/llvm/test/MC/LoongArch/lasx/ssrlrni.s b/llvm/test/MC/LoongArch/lasx/ssrlrni.s new file mode 100644 index 0000000000000000000000000000000000000000..c19626e547169043ddc0b4f4e160f9c3d5463c87 --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/ssrlrni.s @@ -0,0 +1,36 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvssrlrni.b.h $xr26, $xr26, 8 +# CHECK-INST: xvssrlrni.b.h $xr26, $xr26, 8 +# CHECK-ENCODING: encoding: [0x5a,0x63,0x50,0x77] + +xvssrlrni.h.w $xr6, $xr0, 19 +# CHECK-INST: xvssrlrni.h.w $xr6, $xr0, 19 +# CHECK-ENCODING: encoding: [0x06,0xcc,0x50,0x77] + +xvssrlrni.w.d $xr28, $xr15, 55 +# CHECK-INST: xvssrlrni.w.d $xr28, $xr15, 55 +# CHECK-ENCODING: encoding: [0xfc,0xdd,0x51,0x77] + +xvssrlrni.d.q $xr8, $xr16, 64 +# CHECK-INST: xvssrlrni.d.q $xr8, $xr16, 64 +# CHECK-ENCODING: encoding: [0x08,0x02,0x53,0x77] + +xvssrlrni.bu.h $xr23, $xr28, 3 +# CHECK-INST: xvssrlrni.bu.h $xr23, $xr28, 3 +# CHECK-ENCODING: encoding: [0x97,0x4f,0x54,0x77] + +xvssrlrni.hu.w $xr25, $xr10, 18 +# CHECK-INST: xvssrlrni.hu.w $xr25, $xr10, 18 +# CHECK-ENCODING: encoding: [0x59,0xc9,0x54,0x77] + +xvssrlrni.wu.d $xr16, $xr28, 15 +# CHECK-INST: xvssrlrni.wu.d $xr16, $xr28, 15 +# CHECK-ENCODING: encoding: [0x90,0x3f,0x55,0x77] + +xvssrlrni.du.q $xr18, $xr9, 44 +# CHECK-INST: xvssrlrni.du.q $xr18, $xr9, 44 +# CHECK-ENCODING: encoding: [0x32,0xb1,0x56,0x77] diff --git a/llvm/test/MC/LoongArch/lasx/ssub.s b/llvm/test/MC/LoongArch/lasx/ssub.s new file mode 100644 index 0000000000000000000000000000000000000000..e5be5652bcc98ef39efd07f10d673226fc60c199 --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/ssub.s @@ -0,0 +1,36 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvssub.b $xr14, $xr19, $xr24 +# CHECK-INST: xvssub.b $xr14, $xr19, $xr24 +# CHECK-ENCODING: encoding: [0x6e,0x62,0x48,0x74] + +xvssub.h $xr13, $xr8, $xr19 +# CHECK-INST: xvssub.h $xr13, $xr8, $xr19 +# CHECK-ENCODING: encoding: [0x0d,0xcd,0x48,0x74] + +xvssub.w $xr28, $xr27, $xr28 +# CHECK-INST: xvssub.w $xr28, $xr27, $xr28 +# CHECK-ENCODING: encoding: [0x7c,0x73,0x49,0x74] + +xvssub.d $xr28, $xr16, $xr2 +# CHECK-INST: xvssub.d $xr28, $xr16, $xr2 +# CHECK-ENCODING: encoding: [0x1c,0x8a,0x49,0x74] + +xvssub.bu $xr11, $xr13, $xr17 +# CHECK-INST: xvssub.bu $xr11, $xr13, $xr17 +# CHECK-ENCODING: encoding: [0xab,0x45,0x4c,0x74] + +xvssub.hu $xr16, $xr10, $xr28 +# CHECK-INST: xvssub.hu $xr16, $xr10, $xr28 +# CHECK-ENCODING: encoding: [0x50,0xf1,0x4c,0x74] + +xvssub.wu $xr21, $xr0, $xr13 +# CHECK-INST: xvssub.wu $xr21, $xr0, $xr13 +# CHECK-ENCODING: encoding: [0x15,0x34,0x4d,0x74] + +xvssub.du $xr18, $xr26, $xr27 +# CHECK-INST: xvssub.du $xr18, $xr26, $xr27 +# CHECK-ENCODING: encoding: [0x52,0xef,0x4d,0x74] diff --git a/llvm/test/MC/LoongArch/lasx/st.s b/llvm/test/MC/LoongArch/lasx/st.s new file mode 100644 index 0000000000000000000000000000000000000000..d1437e3ec42f543b86565ace7c3c37aead3493be --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/st.s @@ -0,0 +1,12 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvst $xr14, $r12, 943 +# CHECK-INST: xvst $xr14, $t0, 943 +# CHECK-ENCODING: encoding: [0x8e,0xbd,0xce,0x2c] + +xvstx $xr7, $r9, $r21 +# CHECK-INST: xvstx $xr7, $a5, $r21 +# CHECK-ENCODING: encoding: [0x27,0x55,0x4c,0x38] diff --git a/llvm/test/MC/LoongArch/lasx/stelm.s b/llvm/test/MC/LoongArch/lasx/stelm.s new file mode 100644 index 0000000000000000000000000000000000000000..e19030a004761082de91b2e2ebcd6bf8578ffcb6 --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/stelm.s @@ -0,0 +1,20 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvstelm.b $xr20, $r2, -105, 10 +# CHECK-INST: xvstelm.b $xr20, $tp, -105, 10 +# CHECK-ENCODING: encoding: [0x54,0x5c,0xaa,0x33] + +xvstelm.h $xr8, $r1, 160, 4 +# CHECK-INST: xvstelm.h $xr8, $ra, 160, 4 +# CHECK-ENCODING: encoding: [0x28,0x40,0x51,0x33] + +xvstelm.w $xr19, $r18, 412, 0 +# CHECK-INST: xvstelm.w $xr19, $t6, 412, 0 +# CHECK-ENCODING: encoding: [0x53,0x9e,0x21,0x33] + +xvstelm.d $xr22, $r30, 960, 3 +# CHECK-INST: xvstelm.d $xr22, $s7, 960, 3 +# CHECK-ENCODING: encoding: [0xd6,0xe3,0x1d,0x33] diff --git a/llvm/test/MC/LoongArch/lasx/sub.s b/llvm/test/MC/LoongArch/lasx/sub.s new file mode 100644 index 0000000000000000000000000000000000000000..e100730d14bb6711dbce1c18802fbfcd8027f44e --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/sub.s @@ -0,0 +1,24 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvsub.b $xr11, $xr28, $xr16 +# CHECK-INST: xvsub.b $xr11, $xr28, $xr16 +# CHECK-ENCODING: encoding: [0x8b,0x43,0x0c,0x74] + +xvsub.h $xr11, $xr3, $xr24 +# CHECK-INST: xvsub.h $xr11, $xr3, $xr24 +# CHECK-ENCODING: encoding: [0x6b,0xe0,0x0c,0x74] + +xvsub.w $xr14, $xr23, $xr6 +# CHECK-INST: xvsub.w $xr14, $xr23, $xr6 +# CHECK-ENCODING: encoding: [0xee,0x1a,0x0d,0x74] + +xvsub.d $xr5, $xr13, $xr7 +# CHECK-INST: xvsub.d $xr5, $xr13, $xr7 +# CHECK-ENCODING: encoding: [0xa5,0x9d,0x0d,0x74] + +xvsub.q $xr13, $xr26, $xr31 +# CHECK-INST: xvsub.q $xr13, $xr26, $xr31 +# CHECK-ENCODING: encoding: [0x4d,0xff,0x2d,0x75] diff --git a/llvm/test/MC/LoongArch/lasx/subi.s b/llvm/test/MC/LoongArch/lasx/subi.s new file mode 100644 index 0000000000000000000000000000000000000000..921fcf992595da7f48a87dcb784bfa55c66fe2cf --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/subi.s @@ -0,0 +1,20 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvsubi.bu $xr18, $xr27, 1 +# CHECK-INST: xvsubi.bu $xr18, $xr27, 1 +# CHECK-ENCODING: encoding: [0x72,0x07,0x8c,0x76] + +xvsubi.hu $xr6, $xr23, 19 +# CHECK-INST: xvsubi.hu $xr6, $xr23, 19 +# CHECK-ENCODING: encoding: [0xe6,0xce,0x8c,0x76] + +xvsubi.wu $xr13, $xr3, 5 +# CHECK-INST: xvsubi.wu $xr13, $xr3, 5 +# CHECK-ENCODING: encoding: [0x6d,0x14,0x8d,0x76] + +xvsubi.du $xr26, $xr28, 14 +# CHECK-INST: xvsubi.du $xr26, $xr28, 14 +# CHECK-ENCODING: encoding: [0x9a,0xbb,0x8d,0x76] diff --git a/llvm/test/MC/LoongArch/lasx/subw.s b/llvm/test/MC/LoongArch/lasx/subw.s new file mode 100644 index 0000000000000000000000000000000000000000..666edfdae3b25c6d6dd4b71e1bcf823d3142f404 --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/subw.s @@ -0,0 +1,68 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvsubwev.h.b $xr29, $xr1, $xr28 +# CHECK-INST: xvsubwev.h.b $xr29, $xr1, $xr28 +# CHECK-ENCODING: encoding: [0x3d,0x70,0x20,0x74] + +xvsubwev.w.h $xr24, $xr20, $xr31 +# CHECK-INST: xvsubwev.w.h $xr24, $xr20, $xr31 +# CHECK-ENCODING: encoding: [0x98,0xfe,0x20,0x74] + +xvsubwev.d.w $xr6, $xr4, $xr11 +# CHECK-INST: xvsubwev.d.w $xr6, $xr4, $xr11 +# CHECK-ENCODING: encoding: [0x86,0x2c,0x21,0x74] + +xvsubwev.q.d $xr27, $xr31, $xr13 +# CHECK-INST: xvsubwev.q.d $xr27, $xr31, $xr13 +# CHECK-ENCODING: encoding: [0xfb,0xb7,0x21,0x74] + +xvsubwev.h.bu $xr1, $xr20, $xr2 +# CHECK-INST: xvsubwev.h.bu $xr1, $xr20, $xr2 +# CHECK-ENCODING: encoding: [0x81,0x0a,0x30,0x74] + +xvsubwev.w.hu $xr19, $xr6, $xr12 +# CHECK-INST: xvsubwev.w.hu $xr19, $xr6, $xr12 +# CHECK-ENCODING: encoding: [0xd3,0xb0,0x30,0x74] + +xvsubwev.d.wu $xr31, $xr1, $xr23 +# CHECK-INST: xvsubwev.d.wu $xr31, $xr1, $xr23 +# CHECK-ENCODING: encoding: [0x3f,0x5c,0x31,0x74] + +xvsubwev.q.du $xr31, $xr28, $xr17 +# CHECK-INST: xvsubwev.q.du $xr31, $xr28, $xr17 +# CHECK-ENCODING: encoding: [0x9f,0xc7,0x31,0x74] + +xvsubwod.h.b $xr3, $xr9, $xr17 +# CHECK-INST: xvsubwod.h.b $xr3, $xr9, $xr17 +# CHECK-ENCODING: encoding: [0x23,0x45,0x24,0x74] + +xvsubwod.w.h $xr14, $xr5, $xr21 +# CHECK-INST: xvsubwod.w.h $xr14, $xr5, $xr21 +# CHECK-ENCODING: encoding: [0xae,0xd4,0x24,0x74] + +xvsubwod.d.w $xr8, $xr14, $xr3 +# CHECK-INST: xvsubwod.d.w $xr8, $xr14, $xr3 +# CHECK-ENCODING: encoding: [0xc8,0x0d,0x25,0x74] + +xvsubwod.q.d $xr24, $xr15, $xr18 +# CHECK-INST: xvsubwod.q.d $xr24, $xr15, $xr18 +# CHECK-ENCODING: encoding: [0xf8,0xc9,0x25,0x74] + +xvsubwod.h.bu $xr27, $xr2, $xr1 +# CHECK-INST: xvsubwod.h.bu $xr27, $xr2, $xr1 +# CHECK-ENCODING: encoding: [0x5b,0x04,0x34,0x74] + +xvsubwod.w.hu $xr19, $xr7, $xr22 +# CHECK-INST: xvsubwod.w.hu $xr19, $xr7, $xr22 +# CHECK-ENCODING: encoding: [0xf3,0xd8,0x34,0x74] + +xvsubwod.d.wu $xr1, $xr24, $xr26 +# CHECK-INST: xvsubwod.d.wu $xr1, $xr24, $xr26 +# CHECK-ENCODING: encoding: [0x01,0x6b,0x35,0x74] + +xvsubwod.q.du $xr29, $xr26, $xr7 +# CHECK-INST: xvsubwod.q.du $xr29, $xr26, $xr7 +# CHECK-ENCODING: encoding: [0x5d,0x9f,0x35,0x74] diff --git a/llvm/test/MC/LoongArch/lasx/xor.s b/llvm/test/MC/LoongArch/lasx/xor.s new file mode 100644 index 0000000000000000000000000000000000000000..511ad49b12d5b3e9abe3a0370687dc4ae71ceafa --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/xor.s @@ -0,0 +1,8 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvxor.v $xr14, $xr26, $xr10 +# CHECK-INST: xvxor.v $xr14, $xr26, $xr10 +# CHECK-ENCODING: encoding: [0x4e,0x2b,0x27,0x75] diff --git a/llvm/test/MC/LoongArch/lasx/xori.s b/llvm/test/MC/LoongArch/lasx/xori.s new file mode 100644 index 0000000000000000000000000000000000000000..e11dc0c81ccd1976e65493411c06623354223baf --- /dev/null +++ b/llvm/test/MC/LoongArch/lasx/xori.s @@ -0,0 +1,8 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +xvxori.b $xr26, $xr8, 149 +# CHECK-INST: xvxori.b $xr26, $xr8, 149 +# CHECK-ENCODING: encoding: [0x1a,0x55,0xda,0x77] diff --git a/llvm/test/MC/LoongArch/lsx/absd.s b/llvm/test/MC/LoongArch/lsx/absd.s new file mode 100644 index 0000000000000000000000000000000000000000..b203cab285743a7ec2522c97239928a6f47f99d4 --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/absd.s @@ -0,0 +1,36 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vabsd.b $vr14, $vr15, $vr12 +# CHECK-INST: vabsd.b $vr14, $vr15, $vr12 +# CHECK-ENCODING: encoding: [0xee,0x31,0x60,0x70] + +vabsd.h $vr7, $vr13, $vr10 +# CHECK-INST: vabsd.h $vr7, $vr13, $vr10 +# CHECK-ENCODING: encoding: [0xa7,0xa9,0x60,0x70] + +vabsd.w $vr5, $vr28, $vr29 +# CHECK-INST: vabsd.w $vr5, $vr28, $vr29 +# CHECK-ENCODING: encoding: [0x85,0x77,0x61,0x70] + +vabsd.d $vr7, $vr25, $vr5 +# CHECK-INST: vabsd.d $vr7, $vr25, $vr5 +# CHECK-ENCODING: encoding: [0x27,0x97,0x61,0x70] + +vabsd.bu $vr22, $vr16, $vr21 +# CHECK-INST: vabsd.bu $vr22, $vr16, $vr21 +# CHECK-ENCODING: encoding: [0x16,0x56,0x62,0x70] + +vabsd.hu $vr7, $vr29, $vr8 +# CHECK-INST: vabsd.hu $vr7, $vr29, $vr8 +# CHECK-ENCODING: encoding: [0xa7,0xa3,0x62,0x70] + +vabsd.wu $vr19, $vr31, $vr16 +# CHECK-INST: vabsd.wu $vr19, $vr31, $vr16 +# CHECK-ENCODING: encoding: [0xf3,0x43,0x63,0x70] + +vabsd.du $vr29, $vr31, $vr17 +# CHECK-INST: vabsd.du $vr29, $vr31, $vr17 +# CHECK-ENCODING: encoding: [0xfd,0xc7,0x63,0x70] diff --git a/llvm/test/MC/LoongArch/lsx/add.s b/llvm/test/MC/LoongArch/lsx/add.s new file mode 100644 index 0000000000000000000000000000000000000000..fbc6c0fd5dd7fa21d0339066f4308b47d17069d1 --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/add.s @@ -0,0 +1,24 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vadd.b $vr11, $vr12, $vr8 +# CHECK-INST: vadd.b $vr11, $vr12, $vr8 +# CHECK-ENCODING: encoding: [0x8b,0x21,0x0a,0x70] + +vadd.h $vr22, $vr3, $vr4 +# CHECK-INST: vadd.h $vr22, $vr3, $vr4 +# CHECK-ENCODING: encoding: [0x76,0x90,0x0a,0x70] + +vadd.w $vr13, $vr16, $vr6 +# CHECK-INST: vadd.w $vr13, $vr16, $vr6 +# CHECK-ENCODING: encoding: [0x0d,0x1a,0x0b,0x70] + +vadd.d $vr12, $vr9, $vr3 +# CHECK-INST: vadd.d $vr12, $vr9, $vr3 +# CHECK-ENCODING: encoding: [0x2c,0x8d,0x0b,0x70] + +vadd.q $vr16, $vr15, $vr10 +# CHECK-INST: vadd.q $vr16, $vr15, $vr10 +# CHECK-ENCODING: encoding: [0xf0,0x29,0x2d,0x71] diff --git a/llvm/test/MC/LoongArch/lsx/adda.s b/llvm/test/MC/LoongArch/lsx/adda.s new file mode 100644 index 0000000000000000000000000000000000000000..31f073b055ecebec4060a3fe20cb1da7142f956d --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/adda.s @@ -0,0 +1,20 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vadda.b $vr7, $vr14, $vr21 +# CHECK-INST: vadda.b $vr7, $vr14, $vr21 +# CHECK-ENCODING: encoding: [0xc7,0x55,0x5c,0x70] + +vadda.h $vr19, $vr29, $vr2 +# CHECK-INST: vadda.h $vr19, $vr29, $vr2 +# CHECK-ENCODING: encoding: [0xb3,0x8b,0x5c,0x70] + +vadda.w $vr2, $vr23, $vr17 +# CHECK-INST: vadda.w $vr2, $vr23, $vr17 +# CHECK-ENCODING: encoding: [0xe2,0x46,0x5d,0x70] + +vadda.d $vr13, $vr18, $vr24 +# CHECK-INST: vadda.d $vr13, $vr18, $vr24 +# CHECK-ENCODING: encoding: [0x4d,0xe2,0x5d,0x70] diff --git a/llvm/test/MC/LoongArch/lsx/addi.s b/llvm/test/MC/LoongArch/lsx/addi.s new file mode 100644 index 0000000000000000000000000000000000000000..0e5795050d4ea0d7cac3bdb89de8565425a40bb3 --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/addi.s @@ -0,0 +1,20 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vaddi.bu $vr14, $vr3, 2 +# CHECK-INST: vaddi.bu $vr14, $vr3, 2 +# CHECK-ENCODING: encoding: [0x6e,0x08,0x8a,0x72] + +vaddi.hu $vr30, $vr27, 21 +# CHECK-INST: vaddi.hu $vr30, $vr27, 21 +# CHECK-ENCODING: encoding: [0x7e,0xd7,0x8a,0x72] + +vaddi.wu $vr16, $vr28, 27 +# CHECK-INST: vaddi.wu $vr16, $vr28, 27 +# CHECK-ENCODING: encoding: [0x90,0x6f,0x8b,0x72] + +vaddi.du $vr15, $vr8, 24 +# CHECK-INST: vaddi.du $vr15, $vr8, 24 +# CHECK-ENCODING: encoding: [0x0f,0xe1,0x8b,0x72] diff --git a/llvm/test/MC/LoongArch/lsx/addw.s b/llvm/test/MC/LoongArch/lsx/addw.s new file mode 100644 index 0000000000000000000000000000000000000000..fbcc898da803d6f738a90d8336029d3fe19ef462 --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/addw.s @@ -0,0 +1,100 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vaddwev.h.b $vr2, $vr23, $vr25 +# CHECK-INST: vaddwev.h.b $vr2, $vr23, $vr25 +# CHECK-ENCODING: encoding: [0xe2,0x66,0x1e,0x70] + +vaddwev.w.h $vr4, $vr8, $vr30 +# CHECK-INST: vaddwev.w.h $vr4, $vr8, $vr30 +# CHECK-ENCODING: encoding: [0x04,0xf9,0x1e,0x70] + +vaddwev.d.w $vr8, $vr31, $vr5 +# CHECK-INST: vaddwev.d.w $vr8, $vr31, $vr5 +# CHECK-ENCODING: encoding: [0xe8,0x17,0x1f,0x70] + +vaddwev.q.d $vr10, $vr10, $vr13 +# CHECK-INST: vaddwev.q.d $vr10, $vr10, $vr13 +# CHECK-ENCODING: encoding: [0x4a,0xb5,0x1f,0x70] + +vaddwev.h.bu $vr12, $vr24, $vr25 +# CHECK-INST: vaddwev.h.bu $vr12, $vr24, $vr25 +# CHECK-ENCODING: encoding: [0x0c,0x67,0x2e,0x70] + +vaddwev.w.hu $vr3, $vr9, $vr30 +# CHECK-INST: vaddwev.w.hu $vr3, $vr9, $vr30 +# CHECK-ENCODING: encoding: [0x23,0xf9,0x2e,0x70] + +vaddwev.d.wu $vr27, $vr10, $vr17 +# CHECK-INST: vaddwev.d.wu $vr27, $vr10, $vr17 +# CHECK-ENCODING: encoding: [0x5b,0x45,0x2f,0x70] + +vaddwev.q.du $vr25, $vr20, $vr14 +# CHECK-INST: vaddwev.q.du $vr25, $vr20, $vr14 +# CHECK-ENCODING: encoding: [0x99,0xba,0x2f,0x70] + +vaddwev.h.bu.b $vr5, $vr7, $vr16 +# CHECK-INST: vaddwev.h.bu.b $vr5, $vr7, $vr16 +# CHECK-ENCODING: encoding: [0xe5,0x40,0x3e,0x70] + +vaddwev.w.hu.h $vr15, $vr13, $vr29 +# CHECK-INST: vaddwev.w.hu.h $vr15, $vr13, $vr29 +# CHECK-ENCODING: encoding: [0xaf,0xf5,0x3e,0x70] + +vaddwev.d.wu.w $vr2, $vr6, $vr8 +# CHECK-INST: vaddwev.d.wu.w $vr2, $vr6, $vr8 +# CHECK-ENCODING: encoding: [0xc2,0x20,0x3f,0x70] + +vaddwev.q.du.d $vr19, $vr1, $vr12 +# CHECK-INST: vaddwev.q.du.d $vr19, $vr1, $vr12 +# CHECK-ENCODING: encoding: [0x33,0xb0,0x3f,0x70] + +vaddwod.h.b $vr31, $vr6, $vr9 +# CHECK-INST: vaddwod.h.b $vr31, $vr6, $vr9 +# CHECK-ENCODING: encoding: [0xdf,0x24,0x22,0x70] + +vaddwod.w.h $vr17, $vr31, $vr2 +# CHECK-INST: vaddwod.w.h $vr17, $vr31, $vr2 +# CHECK-ENCODING: encoding: [0xf1,0x8b,0x22,0x70] + +vaddwod.d.w $vr11, $vr15, $vr27 +# CHECK-INST: vaddwod.d.w $vr11, $vr15, $vr27 +# CHECK-ENCODING: encoding: [0xeb,0x6d,0x23,0x70] + +vaddwod.q.d $vr0, $vr26, $vr17 +# CHECK-INST: vaddwod.q.d $vr0, $vr26, $vr17 +# CHECK-ENCODING: encoding: [0x40,0xc7,0x23,0x70] + +vaddwod.h.bu $vr30, $vr15, $vr10 +# CHECK-INST: vaddwod.h.bu $vr30, $vr15, $vr10 +# CHECK-ENCODING: encoding: [0xfe,0x29,0x32,0x70] + +vaddwod.w.hu $vr24, $vr22, $vr1 +# CHECK-INST: vaddwod.w.hu $vr24, $vr22, $vr1 +# CHECK-ENCODING: encoding: [0xd8,0x86,0x32,0x70] + +vaddwod.d.wu $vr10, $vr25, $vr13 +# CHECK-INST: vaddwod.d.wu $vr10, $vr25, $vr13 +# CHECK-ENCODING: encoding: [0x2a,0x37,0x33,0x70] + +vaddwod.q.du $vr16, $vr23, $vr21 +# CHECK-INST: vaddwod.q.du $vr16, $vr23, $vr21 +# CHECK-ENCODING: encoding: [0xf0,0xd6,0x33,0x70] + +vaddwod.h.bu.b $vr30, $vr15, $vr2 +# CHECK-INST: vaddwod.h.bu.b $vr30, $vr15, $vr2 +# CHECK-ENCODING: encoding: [0xfe,0x09,0x40,0x70] + +vaddwod.w.hu.h $vr24, $vr30, $vr13 +# CHECK-INST: vaddwod.w.hu.h $vr24, $vr30, $vr13 +# CHECK-ENCODING: encoding: [0xd8,0xb7,0x40,0x70] + +vaddwod.d.wu.w $vr10, $vr26, $vr9 +# CHECK-INST: vaddwod.d.wu.w $vr10, $vr26, $vr9 +# CHECK-ENCODING: encoding: [0x4a,0x27,0x41,0x70] + +vaddwod.q.du.d $vr20, $vr9, $vr16 +# CHECK-INST: vaddwod.q.du.d $vr20, $vr9, $vr16 +# CHECK-ENCODING: encoding: [0x34,0xc1,0x41,0x70] diff --git a/llvm/test/MC/LoongArch/lsx/and.s b/llvm/test/MC/LoongArch/lsx/and.s new file mode 100644 index 0000000000000000000000000000000000000000..f9688c39ff27c336fe2e1532db3902ebdf0fcbf0 --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/and.s @@ -0,0 +1,8 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vand.v $vr27, $vr30, $vr18 +# CHECK-INST: vand.v $vr27, $vr30, $vr18 +# CHECK-ENCODING: encoding: [0xdb,0x4b,0x26,0x71] diff --git a/llvm/test/MC/LoongArch/lsx/andi.s b/llvm/test/MC/LoongArch/lsx/andi.s new file mode 100644 index 0000000000000000000000000000000000000000..11732fbcbcfeab8f1af4dffc111530aadfa70d6e --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/andi.s @@ -0,0 +1,8 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vandi.b $vr10, $vr2, 181 +# CHECK-INST: vandi.b $vr10, $vr2, 181 +# CHECK-ENCODING: encoding: [0x4a,0xd4,0xd2,0x73] diff --git a/llvm/test/MC/LoongArch/lsx/andn.s b/llvm/test/MC/LoongArch/lsx/andn.s new file mode 100644 index 0000000000000000000000000000000000000000..f682313b54b8b28300d91c247adbec255b1067c7 --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/andn.s @@ -0,0 +1,8 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vandn.v $vr1, $vr26, $vr28 +# CHECK-INST: vandn.v $vr1, $vr26, $vr28 +# CHECK-ENCODING: encoding: [0x41,0x73,0x28,0x71] diff --git a/llvm/test/MC/LoongArch/lsx/avg.s b/llvm/test/MC/LoongArch/lsx/avg.s new file mode 100644 index 0000000000000000000000000000000000000000..31b3c4a0aa37eaa856efd6cab6e1b273928bd503 --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/avg.s @@ -0,0 +1,36 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vavg.b $vr13, $vr3, $vr24 +# CHECK-INST: vavg.b $vr13, $vr3, $vr24 +# CHECK-ENCODING: encoding: [0x6d,0x60,0x64,0x70] + +vavg.h $vr3, $vr6, $vr20 +# CHECK-INST: vavg.h $vr3, $vr6, $vr20 +# CHECK-ENCODING: encoding: [0xc3,0xd0,0x64,0x70] + +vavg.w $vr21, $vr7, $vr20 +# CHECK-INST: vavg.w $vr21, $vr7, $vr20 +# CHECK-ENCODING: encoding: [0xf5,0x50,0x65,0x70] + +vavg.d $vr6, $vr22, $vr23 +# CHECK-INST: vavg.d $vr6, $vr22, $vr23 +# CHECK-ENCODING: encoding: [0xc6,0xde,0x65,0x70] + +vavg.bu $vr13, $vr30, $vr16 +# CHECK-INST: vavg.bu $vr13, $vr30, $vr16 +# CHECK-ENCODING: encoding: [0xcd,0x43,0x66,0x70] + +vavg.hu $vr0, $vr15, $vr23 +# CHECK-INST: vavg.hu $vr0, $vr15, $vr23 +# CHECK-ENCODING: encoding: [0xe0,0xdd,0x66,0x70] + +vavg.wu $vr0, $vr17, $vr9 +# CHECK-INST: vavg.wu $vr0, $vr17, $vr9 +# CHECK-ENCODING: encoding: [0x20,0x26,0x67,0x70] + +vavg.du $vr7, $vr22, $vr14 +# CHECK-INST: vavg.du $vr7, $vr22, $vr14 +# CHECK-ENCODING: encoding: [0xc7,0xba,0x67,0x70] diff --git a/llvm/test/MC/LoongArch/lsx/avgr.s b/llvm/test/MC/LoongArch/lsx/avgr.s new file mode 100644 index 0000000000000000000000000000000000000000..566feba6a130eca685f7bc28507b4c04d7266971 --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/avgr.s @@ -0,0 +1,36 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vavgr.b $vr22, $vr3, $vr9 +# CHECK-INST: vavgr.b $vr22, $vr3, $vr9 +# CHECK-ENCODING: encoding: [0x76,0x24,0x68,0x70] + +vavgr.h $vr12, $vr2, $vr6 +# CHECK-INST: vavgr.h $vr12, $vr2, $vr6 +# CHECK-ENCODING: encoding: [0x4c,0x98,0x68,0x70] + +vavgr.w $vr16, $vr30, $vr13 +# CHECK-INST: vavgr.w $vr16, $vr30, $vr13 +# CHECK-ENCODING: encoding: [0xd0,0x37,0x69,0x70] + +vavgr.d $vr5, $vr18, $vr7 +# CHECK-INST: vavgr.d $vr5, $vr18, $vr7 +# CHECK-ENCODING: encoding: [0x45,0x9e,0x69,0x70] + +vavgr.bu $vr22, $vr5, $vr29 +# CHECK-INST: vavgr.bu $vr22, $vr5, $vr29 +# CHECK-ENCODING: encoding: [0xb6,0x74,0x6a,0x70] + +vavgr.hu $vr22, $vr23, $vr8 +# CHECK-INST: vavgr.hu $vr22, $vr23, $vr8 +# CHECK-ENCODING: encoding: [0xf6,0xa2,0x6a,0x70] + +vavgr.wu $vr10, $vr20, $vr21 +# CHECK-INST: vavgr.wu $vr10, $vr20, $vr21 +# CHECK-ENCODING: encoding: [0x8a,0x56,0x6b,0x70] + +vavgr.du $vr10, $vr28, $vr13 +# CHECK-INST: vavgr.du $vr10, $vr28, $vr13 +# CHECK-ENCODING: encoding: [0x8a,0xb7,0x6b,0x70] diff --git a/llvm/test/MC/LoongArch/lsx/bitclr.s b/llvm/test/MC/LoongArch/lsx/bitclr.s new file mode 100644 index 0000000000000000000000000000000000000000..227f90c2a6f76bc76b04654b18ef1a24cda4be90 --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/bitclr.s @@ -0,0 +1,36 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vbitclr.b $vr1, $vr0, $vr30 +# CHECK-INST: vbitclr.b $vr1, $vr0, $vr30 +# CHECK-ENCODING: encoding: [0x01,0x78,0x0c,0x71] + +vbitclr.h $vr27, $vr5, $vr28 +# CHECK-INST: vbitclr.h $vr27, $vr5, $vr28 +# CHECK-ENCODING: encoding: [0xbb,0xf0,0x0c,0x71] + +vbitclr.w $vr3, $vr30, $vr14 +# CHECK-INST: vbitclr.w $vr3, $vr30, $vr14 +# CHECK-ENCODING: encoding: [0xc3,0x3b,0x0d,0x71] + +vbitclr.d $vr25, $vr11, $vr4 +# CHECK-INST: vbitclr.d $vr25, $vr11, $vr4 +# CHECK-ENCODING: encoding: [0x79,0x91,0x0d,0x71] + +vbitclri.b $vr15, $vr25, 4 +# CHECK-INST: vbitclri.b $vr15, $vr25, 4 +# CHECK-ENCODING: encoding: [0x2f,0x33,0x10,0x73] + +vbitclri.h $vr24, $vr22, 1 +# CHECK-INST: vbitclri.h $vr24, $vr22, 1 +# CHECK-ENCODING: encoding: [0xd8,0x46,0x10,0x73] + +vbitclri.w $vr30, $vr20, 1 +# CHECK-INST: vbitclri.w $vr30, $vr20, 1 +# CHECK-ENCODING: encoding: [0x9e,0x86,0x10,0x73] + +vbitclri.d $vr5, $vr0, 16 +# CHECK-INST: vbitclri.d $vr5, $vr0, 16 +# CHECK-ENCODING: encoding: [0x05,0x40,0x11,0x73] diff --git a/llvm/test/MC/LoongArch/lsx/bitrev.s b/llvm/test/MC/LoongArch/lsx/bitrev.s new file mode 100644 index 0000000000000000000000000000000000000000..852500be5c8be4b9ab528e2cf34fc1e1adf663b8 --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/bitrev.s @@ -0,0 +1,36 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vbitrev.b $vr4, $vr31, $vr10 +# CHECK-INST: vbitrev.b $vr4, $vr31, $vr10 +# CHECK-ENCODING: encoding: [0xe4,0x2b,0x10,0x71] + +vbitrev.h $vr19, $vr19, $vr16 +# CHECK-INST: vbitrev.h $vr19, $vr19, $vr16 +# CHECK-ENCODING: encoding: [0x73,0xc2,0x10,0x71] + +vbitrev.w $vr4, $vr18, $vr7 +# CHECK-INST: vbitrev.w $vr4, $vr18, $vr7 +# CHECK-ENCODING: encoding: [0x44,0x1e,0x11,0x71] + +vbitrev.d $vr17, $vr31, $vr0 +# CHECK-INST: vbitrev.d $vr17, $vr31, $vr0 +# CHECK-ENCODING: encoding: [0xf1,0x83,0x11,0x71] + +vbitrevi.b $vr9, $vr31, 7 +# CHECK-INST: vbitrevi.b $vr9, $vr31, 7 +# CHECK-ENCODING: encoding: [0xe9,0x3f,0x18,0x73] + +vbitrevi.h $vr4, $vr24, 8 +# CHECK-INST: vbitrevi.h $vr4, $vr24, 8 +# CHECK-ENCODING: encoding: [0x04,0x63,0x18,0x73] + +vbitrevi.w $vr17, $vr19, 2 +# CHECK-INST: vbitrevi.w $vr17, $vr19, 2 +# CHECK-ENCODING: encoding: [0x71,0x8a,0x18,0x73] + +vbitrevi.d $vr15, $vr7, 47 +# CHECK-INST: vbitrevi.d $vr15, $vr7, 47 +# CHECK-ENCODING: encoding: [0xef,0xbc,0x19,0x73] diff --git a/llvm/test/MC/LoongArch/lsx/bitsel.s b/llvm/test/MC/LoongArch/lsx/bitsel.s new file mode 100644 index 0000000000000000000000000000000000000000..54cce96470ffaa7c0799ace2de3512a8c0ffaac6 --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/bitsel.s @@ -0,0 +1,8 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vbitsel.v $vr2, $vr28, $vr6, $vr30 +# CHECK-INST: vbitsel.v $vr2, $vr28, $vr6, $vr30 +# CHECK-ENCODING: encoding: [0x82,0x1b,0x1f,0x0d] diff --git a/llvm/test/MC/LoongArch/lsx/bitseli.s b/llvm/test/MC/LoongArch/lsx/bitseli.s new file mode 100644 index 0000000000000000000000000000000000000000..6ca8a67d2de848067010724b01969276f8f73286 --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/bitseli.s @@ -0,0 +1,8 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vbitseli.b $vr9, $vr0, 110 +# CHECK-INST: vbitseli.b $vr9, $vr0, 110 +# CHECK-ENCODING: encoding: [0x09,0xb8,0xc5,0x73] diff --git a/llvm/test/MC/LoongArch/lsx/bitset.s b/llvm/test/MC/LoongArch/lsx/bitset.s new file mode 100644 index 0000000000000000000000000000000000000000..3553fc07ec85bf4746046a1b383843a4f2473a37 --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/bitset.s @@ -0,0 +1,36 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vbitset.b $vr13, $vr27, $vr14 +# CHECK-INST: vbitset.b $vr13, $vr27, $vr14 +# CHECK-ENCODING: encoding: [0x6d,0x3b,0x0e,0x71] + +vbitset.h $vr24, $vr6, $vr3 +# CHECK-INST: vbitset.h $vr24, $vr6, $vr3 +# CHECK-ENCODING: encoding: [0xd8,0x8c,0x0e,0x71] + +vbitset.w $vr31, $vr0, $vr0 +# CHECK-INST: vbitset.w $vr31, $vr0, $vr0 +# CHECK-ENCODING: encoding: [0x1f,0x00,0x0f,0x71] + +vbitset.d $vr6, $vr15, $vr31 +# CHECK-INST: vbitset.d $vr6, $vr15, $vr31 +# CHECK-ENCODING: encoding: [0xe6,0xfd,0x0f,0x71] + +vbitseti.b $vr4, $vr3, 1 +# CHECK-INST: vbitseti.b $vr4, $vr3, 1 +# CHECK-ENCODING: encoding: [0x64,0x24,0x14,0x73] + +vbitseti.h $vr10, $vr20, 2 +# CHECK-INST: vbitseti.h $vr10, $vr20, 2 +# CHECK-ENCODING: encoding: [0x8a,0x4a,0x14,0x73] + +vbitseti.w $vr14, $vr16, 4 +# CHECK-INST: vbitseti.w $vr14, $vr16, 4 +# CHECK-ENCODING: encoding: [0x0e,0x92,0x14,0x73] + +vbitseti.d $vr10, $vr13, 25 +# CHECK-INST: vbitseti.d $vr10, $vr13, 25 +# CHECK-ENCODING: encoding: [0xaa,0x65,0x15,0x73] diff --git a/llvm/test/MC/LoongArch/lsx/bsll.s b/llvm/test/MC/LoongArch/lsx/bsll.s new file mode 100644 index 0000000000000000000000000000000000000000..24625180fb56cae406b5f9e29fee9c33ff5ed2d3 --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/bsll.s @@ -0,0 +1,8 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vbsll.v $vr21, $vr1, 17 +# CHECK-INST: vbsll.v $vr21, $vr1, 17 +# CHECK-ENCODING: encoding: [0x35,0x44,0x8e,0x72] diff --git a/llvm/test/MC/LoongArch/lsx/bsrl.s b/llvm/test/MC/LoongArch/lsx/bsrl.s new file mode 100644 index 0000000000000000000000000000000000000000..2b9ab996bb5bf037180c253c570a74057ef81c31 --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/bsrl.s @@ -0,0 +1,8 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vbsrl.v $vr14, $vr15, 24 +# CHECK-INST: vbsrl.v $vr14, $vr15, 24 +# CHECK-ENCODING: encoding: [0xee,0xe1,0x8e,0x72] diff --git a/llvm/test/MC/LoongArch/lsx/clo.s b/llvm/test/MC/LoongArch/lsx/clo.s new file mode 100644 index 0000000000000000000000000000000000000000..de876176aad3eaa1ff042373ea1f73ea4d112cc4 --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/clo.s @@ -0,0 +1,20 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vclo.b $vr2, $vr0 +# CHECK-INST: vclo.b $vr2, $vr0 +# CHECK-ENCODING: encoding: [0x02,0x00,0x9c,0x72] + +vclo.h $vr23, $vr31 +# CHECK-INST: vclo.h $vr23, $vr31 +# CHECK-ENCODING: encoding: [0xf7,0x07,0x9c,0x72] + +vclo.w $vr7, $vr28 +# CHECK-INST: vclo.w $vr7, $vr28 +# CHECK-ENCODING: encoding: [0x87,0x0b,0x9c,0x72] + +vclo.d $vr5, $vr11 +# CHECK-INST: vclo.d $vr5, $vr11 +# CHECK-ENCODING: encoding: [0x65,0x0d,0x9c,0x72] diff --git a/llvm/test/MC/LoongArch/lsx/clz.s b/llvm/test/MC/LoongArch/lsx/clz.s new file mode 100644 index 0000000000000000000000000000000000000000..de8c603109f5e39b5ae9310a128ce64ca0dbc2b0 --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/clz.s @@ -0,0 +1,20 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vclz.b $vr22, $vr14 +# CHECK-INST: vclz.b $vr22, $vr14 +# CHECK-ENCODING: encoding: [0xd6,0x11,0x9c,0x72] + +vclz.h $vr16, $vr0 +# CHECK-INST: vclz.h $vr16, $vr0 +# CHECK-ENCODING: encoding: [0x10,0x14,0x9c,0x72] + +vclz.w $vr19, $vr19 +# CHECK-INST: vclz.w $vr19, $vr19 +# CHECK-ENCODING: encoding: [0x73,0x1a,0x9c,0x72] + +vclz.d $vr27, $vr14 +# CHECK-INST: vclz.d $vr27, $vr14 +# CHECK-ENCODING: encoding: [0xdb,0x1d,0x9c,0x72] diff --git a/llvm/test/MC/LoongArch/lsx/div.s b/llvm/test/MC/LoongArch/lsx/div.s new file mode 100644 index 0000000000000000000000000000000000000000..625c2a1b425e1f4fb5c1842df980e05866153a68 --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/div.s @@ -0,0 +1,36 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vdiv.b $vr26, $vr17, $vr24 +# CHECK-INST: vdiv.b $vr26, $vr17, $vr24 +# CHECK-ENCODING: encoding: [0x3a,0x62,0xe0,0x70] + +vdiv.h $vr26, $vr23, $vr21 +# CHECK-INST: vdiv.h $vr26, $vr23, $vr21 +# CHECK-ENCODING: encoding: [0xfa,0xd6,0xe0,0x70] + +vdiv.w $vr1, $vr13, $vr10 +# CHECK-INST: vdiv.w $vr1, $vr13, $vr10 +# CHECK-ENCODING: encoding: [0xa1,0x29,0xe1,0x70] + +vdiv.d $vr4, $vr25, $vr21 +# CHECK-INST: vdiv.d $vr4, $vr25, $vr21 +# CHECK-ENCODING: encoding: [0x24,0xd7,0xe1,0x70] + +vdiv.bu $vr13, $vr13, $vr6 +# CHECK-INST: vdiv.bu $vr13, $vr13, $vr6 +# CHECK-ENCODING: encoding: [0xad,0x19,0xe4,0x70] + +vdiv.hu $vr1, $vr30, $vr5 +# CHECK-INST: vdiv.hu $vr1, $vr30, $vr5 +# CHECK-ENCODING: encoding: [0xc1,0x97,0xe4,0x70] + +vdiv.wu $vr27, $vr31, $vr20 +# CHECK-INST: vdiv.wu $vr27, $vr31, $vr20 +# CHECK-ENCODING: encoding: [0xfb,0x53,0xe5,0x70] + +vdiv.du $vr30, $vr0, $vr5 +# CHECK-INST: vdiv.du $vr30, $vr0, $vr5 +# CHECK-ENCODING: encoding: [0x1e,0x94,0xe5,0x70] diff --git a/llvm/test/MC/LoongArch/lsx/exth.s b/llvm/test/MC/LoongArch/lsx/exth.s new file mode 100644 index 0000000000000000000000000000000000000000..3a64bd927376305ac0b194501942de77ea411cba --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/exth.s @@ -0,0 +1,36 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vexth.h.b $vr9, $vr6 +# CHECK-INST: vexth.h.b $vr9, $vr6 +# CHECK-ENCODING: encoding: [0xc9,0xe0,0x9e,0x72] + +vexth.w.h $vr14, $vr19 +# CHECK-INST: vexth.w.h $vr14, $vr19 +# CHECK-ENCODING: encoding: [0x6e,0xe6,0x9e,0x72] + +vexth.d.w $vr1, $vr20 +# CHECK-INST: vexth.d.w $vr1, $vr20 +# CHECK-ENCODING: encoding: [0x81,0xea,0x9e,0x72] + +vexth.q.d $vr20, $vr10 +# CHECK-INST: vexth.q.d $vr20, $vr10 +# CHECK-ENCODING: encoding: [0x54,0xed,0x9e,0x72] + +vexth.hu.bu $vr5, $vr1 +# CHECK-INST: vexth.hu.bu $vr5, $vr1 +# CHECK-ENCODING: encoding: [0x25,0xf0,0x9e,0x72] + +vexth.wu.hu $vr17, $vr26 +# CHECK-INST: vexth.wu.hu $vr17, $vr26 +# CHECK-ENCODING: encoding: [0x51,0xf7,0x9e,0x72] + +vexth.du.wu $vr2, $vr7 +# CHECK-INST: vexth.du.wu $vr2, $vr7 +# CHECK-ENCODING: encoding: [0xe2,0xf8,0x9e,0x72] + +vexth.qu.du $vr19, $vr11 +# CHECK-INST: vexth.qu.du $vr19, $vr11 +# CHECK-ENCODING: encoding: [0x73,0xfd,0x9e,0x72] diff --git a/llvm/test/MC/LoongArch/lsx/extl.s b/llvm/test/MC/LoongArch/lsx/extl.s new file mode 100644 index 0000000000000000000000000000000000000000..cce80e2f1fe3c0596717a65446bb959bf58ee74e --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/extl.s @@ -0,0 +1,12 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vextl.q.d $vr14, $vr20 +# CHECK-INST: vextl.q.d $vr14, $vr20 +# CHECK-ENCODING: encoding: [0x8e,0x02,0x09,0x73] + +vextl.qu.du $vr26, $vr26 +# CHECK-INST: vextl.qu.du $vr26, $vr26 +# CHECK-ENCODING: encoding: [0x5a,0x03,0x0d,0x73] diff --git a/llvm/test/MC/LoongArch/lsx/extrins.s b/llvm/test/MC/LoongArch/lsx/extrins.s new file mode 100644 index 0000000000000000000000000000000000000000..ef279b7be50550241e90f3e2419b78d83848477e --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/extrins.s @@ -0,0 +1,20 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vextrins.b $vr14, $vr19, 213 +# CHECK-INST: vextrins.b $vr14, $vr19, 213 +# CHECK-ENCODING: encoding: [0x6e,0x56,0x8f,0x73] + +vextrins.h $vr1, $vr6, 170 +# CHECK-INST: vextrins.h $vr1, $vr6, 170 +# CHECK-ENCODING: encoding: [0xc1,0xa8,0x8a,0x73] + +vextrins.w $vr9, $vr4, 189 +# CHECK-INST: vextrins.w $vr9, $vr4, 189 +# CHECK-ENCODING: encoding: [0x89,0xf4,0x86,0x73] + +vextrins.d $vr20, $vr25, 121 +# CHECK-INST: vextrins.d $vr20, $vr25, 121 +# CHECK-ENCODING: encoding: [0x34,0xe7,0x81,0x73] diff --git a/llvm/test/MC/LoongArch/lsx/fadd.s b/llvm/test/MC/LoongArch/lsx/fadd.s new file mode 100644 index 0000000000000000000000000000000000000000..55e67cf9096dc6d4f9606ab1808d206ffda46ec9 --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/fadd.s @@ -0,0 +1,12 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vfadd.s $vr10, $vr2, $vr15 +# CHECK-INST: vfadd.s $vr10, $vr2, $vr15 +# CHECK-ENCODING: encoding: [0x4a,0xbc,0x30,0x71] + +vfadd.d $vr16, $vr1, $vr22 +# CHECK-INST: vfadd.d $vr16, $vr1, $vr22 +# CHECK-ENCODING: encoding: [0x30,0x58,0x31,0x71] diff --git a/llvm/test/MC/LoongArch/lsx/fclass.s b/llvm/test/MC/LoongArch/lsx/fclass.s new file mode 100644 index 0000000000000000000000000000000000000000..4f4ad1ef2a225902be5f6e0f31f7da36b6793c81 --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/fclass.s @@ -0,0 +1,12 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vfclass.s $vr24, $vr26 +# CHECK-INST: vfclass.s $vr24, $vr26 +# CHECK-ENCODING: encoding: [0x58,0xd7,0x9c,0x72] + +vfclass.d $vr8, $vr17 +# CHECK-INST: vfclass.d $vr8, $vr17 +# CHECK-ENCODING: encoding: [0x28,0xda,0x9c,0x72] diff --git a/llvm/test/MC/LoongArch/lsx/fcmp.s b/llvm/test/MC/LoongArch/lsx/fcmp.s new file mode 100644 index 0000000000000000000000000000000000000000..c7ea5bd0441061b4bd58a2b5cfce8c08c644f145 --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/fcmp.s @@ -0,0 +1,180 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vfcmp.caf.s $vr25, $vr5, $vr4 +# CHECK-INST: vfcmp.caf.s $vr25, $vr5, $vr4 +# CHECK-ENCODING: encoding: [0xb9,0x10,0x50,0x0c] + +vfcmp.caf.d $vr14, $vr16, $vr23 +# CHECK-INST: vfcmp.caf.d $vr14, $vr16, $vr23 +# CHECK-ENCODING: encoding: [0x0e,0x5e,0x60,0x0c] + +vfcmp.cun.s $vr22, $vr15, $vr4 +# CHECK-INST: vfcmp.cun.s $vr22, $vr15, $vr4 +# CHECK-ENCODING: encoding: [0xf6,0x11,0x54,0x0c] + +vfcmp.cun.d $vr28, $vr27, $vr9 +# CHECK-INST: vfcmp.cun.d $vr28, $vr27, $vr9 +# CHECK-ENCODING: encoding: [0x7c,0x27,0x64,0x0c] + +vfcmp.ceq.s $vr20, $vr24, $vr29 +# CHECK-INST: vfcmp.ceq.s $vr20, $vr24, $vr29 +# CHECK-ENCODING: encoding: [0x14,0x77,0x52,0x0c] + +vfcmp.ceq.d $vr15, $vr23, $vr17 +# CHECK-INST: vfcmp.ceq.d $vr15, $vr23, $vr17 +# CHECK-ENCODING: encoding: [0xef,0x46,0x62,0x0c] + +vfcmp.cueq.s $vr26, $vr31, $vr19 +# CHECK-INST: vfcmp.cueq.s $vr26, $vr31, $vr19 +# CHECK-ENCODING: encoding: [0xfa,0x4f,0x56,0x0c] + +vfcmp.cueq.d $vr27, $vr10, $vr16 +# CHECK-INST: vfcmp.cueq.d $vr27, $vr10, $vr16 +# CHECK-ENCODING: encoding: [0x5b,0x41,0x66,0x0c] + +vfcmp.clt.s $vr6, $vr27, $vr2 +# CHECK-INST: vfcmp.clt.s $vr6, $vr27, $vr2 +# CHECK-ENCODING: encoding: [0x66,0x0b,0x51,0x0c] + +vfcmp.clt.d $vr11, $vr8, $vr6 +# CHECK-INST: vfcmp.clt.d $vr11, $vr8, $vr6 +# CHECK-ENCODING: encoding: [0x0b,0x19,0x61,0x0c] + +vfcmp.cult.s $vr1, $vr17, $vr2 +# CHECK-INST: vfcmp.cult.s $vr1, $vr17, $vr2 +# CHECK-ENCODING: encoding: [0x21,0x0a,0x55,0x0c] + +vfcmp.cult.d $vr11, $vr20, $vr7 +# CHECK-INST: vfcmp.cult.d $vr11, $vr20, $vr7 +# CHECK-ENCODING: encoding: [0x8b,0x1e,0x65,0x0c] + +vfcmp.cle.s $vr10, $vr20, $vr23 +# CHECK-INST: vfcmp.cle.s $vr10, $vr20, $vr23 +# CHECK-ENCODING: encoding: [0x8a,0x5e,0x53,0x0c] + +vfcmp.cle.d $vr1, $vr8, $vr18 +# CHECK-INST: vfcmp.cle.d $vr1, $vr8, $vr18 +# CHECK-ENCODING: encoding: [0x01,0x49,0x63,0x0c] + +vfcmp.cule.s $vr6, $vr15, $vr11 +# CHECK-INST: vfcmp.cule.s $vr6, $vr15, $vr11 +# CHECK-ENCODING: encoding: [0xe6,0x2d,0x57,0x0c] + +vfcmp.cule.d $vr11, $vr28, $vr30 +# CHECK-INST: vfcmp.cule.d $vr11, $vr28, $vr30 +# CHECK-ENCODING: encoding: [0x8b,0x7b,0x67,0x0c] + +vfcmp.cne.s $vr29, $vr28, $vr11 +# CHECK-INST: vfcmp.cne.s $vr29, $vr28, $vr11 +# CHECK-ENCODING: encoding: [0x9d,0x2f,0x58,0x0c] + +vfcmp.cne.d $vr20, $vr5, $vr7 +# CHECK-INST: vfcmp.cne.d $vr20, $vr5, $vr7 +# CHECK-ENCODING: encoding: [0xb4,0x1c,0x68,0x0c] + +vfcmp.cor.s $vr20, $vr17, $vr12 +# CHECK-INST: vfcmp.cor.s $vr20, $vr17, $vr12 +# CHECK-ENCODING: encoding: [0x34,0x32,0x5a,0x0c] + +vfcmp.cor.d $vr25, $vr10, $vr16 +# CHECK-INST: vfcmp.cor.d $vr25, $vr10, $vr16 +# CHECK-ENCODING: encoding: [0x59,0x41,0x6a,0x0c] + +vfcmp.cune.s $vr26, $vr7, $vr8 +# CHECK-INST: vfcmp.cune.s $vr26, $vr7, $vr8 +# CHECK-ENCODING: encoding: [0xfa,0x20,0x5c,0x0c] + +vfcmp.cune.d $vr13, $vr31, $vr3 +# CHECK-INST: vfcmp.cune.d $vr13, $vr31, $vr3 +# CHECK-ENCODING: encoding: [0xed,0x0f,0x6c,0x0c] + +vfcmp.saf.s $vr26, $vr25, $vr5 +# CHECK-INST: vfcmp.saf.s $vr26, $vr25, $vr5 +# CHECK-ENCODING: encoding: [0x3a,0x97,0x50,0x0c] + +vfcmp.saf.d $vr5, $vr29, $vr21 +# CHECK-INST: vfcmp.saf.d $vr5, $vr29, $vr21 +# CHECK-ENCODING: encoding: [0xa5,0xd7,0x60,0x0c] + +vfcmp.sun.s $vr2, $vr2, $vr11 +# CHECK-INST: vfcmp.sun.s $vr2, $vr2, $vr11 +# CHECK-ENCODING: encoding: [0x42,0xac,0x54,0x0c] + +vfcmp.sun.d $vr30, $vr23, $vr23 +# CHECK-INST: vfcmp.sun.d $vr30, $vr23, $vr23 +# CHECK-ENCODING: encoding: [0xfe,0xde,0x64,0x0c] + +vfcmp.seq.s $vr4, $vr24, $vr31 +# CHECK-INST: vfcmp.seq.s $vr4, $vr24, $vr31 +# CHECK-ENCODING: encoding: [0x04,0xff,0x52,0x0c] + +vfcmp.seq.d $vr28, $vr28, $vr5 +# CHECK-INST: vfcmp.seq.d $vr28, $vr28, $vr5 +# CHECK-ENCODING: encoding: [0x9c,0x97,0x62,0x0c] + +vfcmp.sueq.s $vr2, $vr25, $vr29 +# CHECK-INST: vfcmp.sueq.s $vr2, $vr25, $vr29 +# CHECK-ENCODING: encoding: [0x22,0xf7,0x56,0x0c] + +vfcmp.sueq.d $vr26, $vr16, $vr0 +# CHECK-INST: vfcmp.sueq.d $vr26, $vr16, $vr0 +# CHECK-ENCODING: encoding: [0x1a,0x82,0x66,0x0c] + +vfcmp.slt.s $vr8, $vr22, $vr5 +# CHECK-INST: vfcmp.slt.s $vr8, $vr22, $vr5 +# CHECK-ENCODING: encoding: [0xc8,0x96,0x51,0x0c] + +vfcmp.slt.d $vr13, $vr8, $vr22 +# CHECK-INST: vfcmp.slt.d $vr13, $vr8, $vr22 +# CHECK-ENCODING: encoding: [0x0d,0xd9,0x61,0x0c] + +vfcmp.sult.s $vr16, $vr4, $vr21 +# CHECK-INST: vfcmp.sult.s $vr16, $vr4, $vr21 +# CHECK-ENCODING: encoding: [0x90,0xd4,0x55,0x0c] + +vfcmp.sult.d $vr28, $vr14, $vr4 +# CHECK-INST: vfcmp.sult.d $vr28, $vr14, $vr4 +# CHECK-ENCODING: encoding: [0xdc,0x91,0x65,0x0c] + +vfcmp.sle.s $vr13, $vr21, $vr8 +# CHECK-INST: vfcmp.sle.s $vr13, $vr21, $vr8 +# CHECK-ENCODING: encoding: [0xad,0xa2,0x53,0x0c] + +vfcmp.sle.d $vr3, $vr18, $vr9 +# CHECK-INST: vfcmp.sle.d $vr3, $vr18, $vr9 +# CHECK-ENCODING: encoding: [0x43,0xa6,0x63,0x0c] + +vfcmp.sule.s $vr8, $vr23, $vr19 +# CHECK-INST: vfcmp.sule.s $vr8, $vr23, $vr19 +# CHECK-ENCODING: encoding: [0xe8,0xce,0x57,0x0c] + +vfcmp.sule.d $vr22, $vr17, $vr11 +# CHECK-INST: vfcmp.sule.d $vr22, $vr17, $vr11 +# CHECK-ENCODING: encoding: [0x36,0xae,0x67,0x0c] + +vfcmp.sne.s $vr17, $vr25, $vr6 +# CHECK-INST: vfcmp.sne.s $vr17, $vr25, $vr6 +# CHECK-ENCODING: encoding: [0x31,0x9b,0x58,0x0c] + +vfcmp.sne.d $vr3, $vr1, $vr28 +# CHECK-INST: vfcmp.sne.d $vr3, $vr1, $vr28 +# CHECK-ENCODING: encoding: [0x23,0xf0,0x68,0x0c] + +vfcmp.sor.s $vr31, $vr20, $vr11 +# CHECK-INST: vfcmp.sor.s $vr31, $vr20, $vr11 +# CHECK-ENCODING: encoding: [0x9f,0xae,0x5a,0x0c] + +vfcmp.sor.d $vr18, $vr4, $vr15 +# CHECK-INST: vfcmp.sor.d $vr18, $vr4, $vr15 +# CHECK-ENCODING: encoding: [0x92,0xbc,0x6a,0x0c] + +vfcmp.sune.s $vr16, $vr17, $vr15 +# CHECK-INST: vfcmp.sune.s $vr16, $vr17, $vr15 +# CHECK-ENCODING: encoding: [0x30,0xbe,0x5c,0x0c] + +vfcmp.sune.d $vr23, $vr1, $vr19 +# CHECK-INST: vfcmp.sune.d $vr23, $vr1, $vr19 +# CHECK-ENCODING: encoding: [0x37,0xcc,0x6c,0x0c] diff --git a/llvm/test/MC/LoongArch/lsx/fcvt.s b/llvm/test/MC/LoongArch/lsx/fcvt.s new file mode 100644 index 0000000000000000000000000000000000000000..30689a79fdfe4a888888f4e96e79e6cfe90c5e3c --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/fcvt.s @@ -0,0 +1,12 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vfcvt.h.s $vr30, $vr1, $vr30 +# CHECK-INST: vfcvt.h.s $vr30, $vr1, $vr30 +# CHECK-ENCODING: encoding: [0x3e,0x78,0x46,0x71] + +vfcvt.s.d $vr27, $vr11, $vr4 +# CHECK-INST: vfcvt.s.d $vr27, $vr11, $vr4 +# CHECK-ENCODING: encoding: [0x7b,0x91,0x46,0x71] diff --git a/llvm/test/MC/LoongArch/lsx/fcvth.s b/llvm/test/MC/LoongArch/lsx/fcvth.s new file mode 100644 index 0000000000000000000000000000000000000000..09d2dad6526b2c91a52e67bfee07c78f11544d38 --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/fcvth.s @@ -0,0 +1,12 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vfcvth.s.h $vr7, $vr30 +# CHECK-INST: vfcvth.s.h $vr7, $vr30 +# CHECK-ENCODING: encoding: [0xc7,0xef,0x9d,0x72] + +vfcvth.d.s $vr15, $vr14 +# CHECK-INST: vfcvth.d.s $vr15, $vr14 +# CHECK-ENCODING: encoding: [0xcf,0xf5,0x9d,0x72] diff --git a/llvm/test/MC/LoongArch/lsx/fcvtl.s b/llvm/test/MC/LoongArch/lsx/fcvtl.s new file mode 100644 index 0000000000000000000000000000000000000000..bb45be02c61f84fba8801e9cf45db3f461a62ec4 --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/fcvtl.s @@ -0,0 +1,12 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vfcvtl.s.h $vr26, $vr23 +# CHECK-INST: vfcvtl.s.h $vr26, $vr23 +# CHECK-ENCODING: encoding: [0xfa,0xea,0x9d,0x72] + +vfcvtl.d.s $vr3, $vr7 +# CHECK-INST: vfcvtl.d.s $vr3, $vr7 +# CHECK-ENCODING: encoding: [0xe3,0xf0,0x9d,0x72] diff --git a/llvm/test/MC/LoongArch/lsx/fdiv.s b/llvm/test/MC/LoongArch/lsx/fdiv.s new file mode 100644 index 0000000000000000000000000000000000000000..cb3b536c63a4181754656784147223386f00f40e --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/fdiv.s @@ -0,0 +1,12 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vfdiv.s $vr27, $vr12, $vr26 +# CHECK-INST: vfdiv.s $vr27, $vr12, $vr26 +# CHECK-ENCODING: encoding: [0x9b,0xe9,0x3a,0x71] + +vfdiv.d $vr3, $vr1, $vr7 +# CHECK-INST: vfdiv.d $vr3, $vr1, $vr7 +# CHECK-ENCODING: encoding: [0x23,0x1c,0x3b,0x71] diff --git a/llvm/test/MC/LoongArch/lsx/ffint.s b/llvm/test/MC/LoongArch/lsx/ffint.s new file mode 100644 index 0000000000000000000000000000000000000000..e6340d161282cda8fe73a4b3b17def23f64acd12 --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/ffint.s @@ -0,0 +1,32 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vffint.s.w $vr3, $vr0 +# CHECK-INST: vffint.s.w $vr3, $vr0 +# CHECK-ENCODING: encoding: [0x03,0x00,0x9e,0x72] + +vffint.d.l $vr2, $vr15 +# CHECK-INST: vffint.d.l $vr2, $vr15 +# CHECK-ENCODING: encoding: [0xe2,0x09,0x9e,0x72] + +vffint.s.wu $vr5, $vr9 +# CHECK-INST: vffint.s.wu $vr5, $vr9 +# CHECK-ENCODING: encoding: [0x25,0x05,0x9e,0x72] + +vffint.d.lu $vr6, $vr13 +# CHECK-INST: vffint.d.lu $vr6, $vr13 +# CHECK-ENCODING: encoding: [0xa6,0x0d,0x9e,0x72] + +vffintl.d.w $vr26, $vr1 +# CHECK-INST: vffintl.d.w $vr26, $vr1 +# CHECK-ENCODING: encoding: [0x3a,0x10,0x9e,0x72] + +vffinth.d.w $vr18, $vr21 +# CHECK-INST: vffinth.d.w $vr18, $vr21 +# CHECK-ENCODING: encoding: [0xb2,0x16,0x9e,0x72] + +vffint.s.l $vr29, $vr12, $vr7 +# CHECK-INST: vffint.s.l $vr29, $vr12, $vr7 +# CHECK-ENCODING: encoding: [0x9d,0x1d,0x48,0x71] diff --git a/llvm/test/MC/LoongArch/lsx/flogb.s b/llvm/test/MC/LoongArch/lsx/flogb.s new file mode 100644 index 0000000000000000000000000000000000000000..3c788b96fa379e81b76a45a7237ea122b74c4231 --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/flogb.s @@ -0,0 +1,12 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vflogb.s $vr12, $vr20 +# CHECK-INST: vflogb.s $vr12, $vr20 +# CHECK-ENCODING: encoding: [0x8c,0xc6,0x9c,0x72] + +vflogb.d $vr3, $vr29 +# CHECK-INST: vflogb.d $vr3, $vr29 +# CHECK-ENCODING: encoding: [0xa3,0xcb,0x9c,0x72] diff --git a/llvm/test/MC/LoongArch/lsx/fmadd.s b/llvm/test/MC/LoongArch/lsx/fmadd.s new file mode 100644 index 0000000000000000000000000000000000000000..a31e12120ff18f2d72eded3e781f249dd76487d5 --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/fmadd.s @@ -0,0 +1,12 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vfmadd.s $vr6, $vr7, $vr13, $vr24 +# CHECK-INST: vfmadd.s $vr6, $vr7, $vr13, $vr24 +# CHECK-ENCODING: encoding: [0xe6,0x34,0x1c,0x09] + +vfmadd.d $vr3, $vr28, $vr2, $vr21 +# CHECK-INST: vfmadd.d $vr3, $vr28, $vr2, $vr21 +# CHECK-ENCODING: encoding: [0x83,0x8b,0x2a,0x09] diff --git a/llvm/test/MC/LoongArch/lsx/fmax.s b/llvm/test/MC/LoongArch/lsx/fmax.s new file mode 100644 index 0000000000000000000000000000000000000000..8fcd32aeb55404e1672e0ce44213457bd1bc52fb --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/fmax.s @@ -0,0 +1,12 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vfmax.s $vr19, $vr25, $vr16 +# CHECK-INST: vfmax.s $vr19, $vr25, $vr16 +# CHECK-ENCODING: encoding: [0x33,0xc3,0x3c,0x71] + +vfmax.d $vr19, $vr21, $vr12 +# CHECK-INST: vfmax.d $vr19, $vr21, $vr12 +# CHECK-ENCODING: encoding: [0xb3,0x32,0x3d,0x71] diff --git a/llvm/test/MC/LoongArch/lsx/fmaxa.s b/llvm/test/MC/LoongArch/lsx/fmaxa.s new file mode 100644 index 0000000000000000000000000000000000000000..3e35c329d126c9488ce13138bc9cbb2fac90061e --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/fmaxa.s @@ -0,0 +1,12 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vfmaxa.s $vr2, $vr8, $vr1 +# CHECK-INST: vfmaxa.s $vr2, $vr8, $vr1 +# CHECK-ENCODING: encoding: [0x02,0x85,0x40,0x71] + +vfmaxa.d $vr1, $vr8, $vr28 +# CHECK-INST: vfmaxa.d $vr1, $vr8, $vr28 +# CHECK-ENCODING: encoding: [0x01,0x71,0x41,0x71] diff --git a/llvm/test/MC/LoongArch/lsx/fmin.s b/llvm/test/MC/LoongArch/lsx/fmin.s new file mode 100644 index 0000000000000000000000000000000000000000..b398fe70d27f431dfe879163955d0a5cee83455a --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/fmin.s @@ -0,0 +1,12 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vfmin.s $vr18, $vr17, $vr1 +# CHECK-INST: vfmin.s $vr18, $vr17, $vr1 +# CHECK-ENCODING: encoding: [0x32,0x86,0x3e,0x71] + +vfmin.d $vr30, $vr12, $vr5 +# CHECK-INST: vfmin.d $vr30, $vr12, $vr5 +# CHECK-ENCODING: encoding: [0x9e,0x15,0x3f,0x71] diff --git a/llvm/test/MC/LoongArch/lsx/fmina.s b/llvm/test/MC/LoongArch/lsx/fmina.s new file mode 100644 index 0000000000000000000000000000000000000000..d206819aef8c00c0364d1fd29c9198b50da03261 --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/fmina.s @@ -0,0 +1,12 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vfmina.s $vr20, $vr27, $vr20 +# CHECK-INST: vfmina.s $vr20, $vr27, $vr20 +# CHECK-ENCODING: encoding: [0x74,0xd3,0x42,0x71] + +vfmina.d $vr1, $vr26, $vr22 +# CHECK-INST: vfmina.d $vr1, $vr26, $vr22 +# CHECK-ENCODING: encoding: [0x41,0x5b,0x43,0x71] diff --git a/llvm/test/MC/LoongArch/lsx/fmsub.s b/llvm/test/MC/LoongArch/lsx/fmsub.s new file mode 100644 index 0000000000000000000000000000000000000000..84ce341de0399655711b3b32342fb23fe7a14315 --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/fmsub.s @@ -0,0 +1,12 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vfmsub.s $vr25, $vr30, $vr4, $vr13 +# CHECK-INST: vfmsub.s $vr25, $vr30, $vr4, $vr13 +# CHECK-ENCODING: encoding: [0xd9,0x93,0x56,0x09] + +vfmsub.d $vr3, $vr1, $vr0, $vr19 +# CHECK-INST: vfmsub.d $vr3, $vr1, $vr0, $vr19 +# CHECK-ENCODING: encoding: [0x23,0x80,0x69,0x09] diff --git a/llvm/test/MC/LoongArch/lsx/fmul.s b/llvm/test/MC/LoongArch/lsx/fmul.s new file mode 100644 index 0000000000000000000000000000000000000000..a409a6a4020d64c71857788d66d3415b06ede960 --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/fmul.s @@ -0,0 +1,12 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vfmul.s $vr16, $vr8, $vr17 +# CHECK-INST: vfmul.s $vr16, $vr8, $vr17 +# CHECK-ENCODING: encoding: [0x10,0xc5,0x38,0x71] + +vfmul.d $vr3, $vr6, $vr1 +# CHECK-INST: vfmul.d $vr3, $vr6, $vr1 +# CHECK-ENCODING: encoding: [0xc3,0x04,0x39,0x71] diff --git a/llvm/test/MC/LoongArch/lsx/fnmadd.s b/llvm/test/MC/LoongArch/lsx/fnmadd.s new file mode 100644 index 0000000000000000000000000000000000000000..1887d2688b6daba9dbce397d396bb94e902f13f1 --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/fnmadd.s @@ -0,0 +1,12 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vfnmadd.s $vr26, $vr26, $vr13, $vr9 +# CHECK-INST: vfnmadd.s $vr26, $vr26, $vr13, $vr9 +# CHECK-ENCODING: encoding: [0x5a,0xb7,0x94,0x09] + +vfnmadd.d $vr12, $vr27, $vr31, $vr5 +# CHECK-INST: vfnmadd.d $vr12, $vr27, $vr31, $vr5 +# CHECK-ENCODING: encoding: [0x6c,0xff,0xa2,0x09] diff --git a/llvm/test/MC/LoongArch/lsx/fnmsub.s b/llvm/test/MC/LoongArch/lsx/fnmsub.s new file mode 100644 index 0000000000000000000000000000000000000000..e1135d1b29681d09013bf03e688a5bc898f81150 --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/fnmsub.s @@ -0,0 +1,12 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vfnmsub.s $vr2, $vr21, $vr9, $vr2 +# CHECK-INST: vfnmsub.s $vr2, $vr21, $vr9, $vr2 +# CHECK-ENCODING: encoding: [0xa2,0x26,0xd1,0x09] + +vfnmsub.d $vr4, $vr12, $vr27, $vr19 +# CHECK-INST: vfnmsub.d $vr4, $vr12, $vr27, $vr19 +# CHECK-ENCODING: encoding: [0x84,0xed,0xe9,0x09] diff --git a/llvm/test/MC/LoongArch/lsx/frecip.s b/llvm/test/MC/LoongArch/lsx/frecip.s new file mode 100644 index 0000000000000000000000000000000000000000..d8c8278d16675e3dbb6d3e271cfa695ad2da5d1f --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/frecip.s @@ -0,0 +1,12 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vfrecip.s $vr29, $vr14 +# CHECK-INST: vfrecip.s $vr29, $vr14 +# CHECK-ENCODING: encoding: [0xdd,0xf5,0x9c,0x72] + +vfrecip.d $vr24, $vr9 +# CHECK-INST: vfrecip.d $vr24, $vr9 +# CHECK-ENCODING: encoding: [0x38,0xf9,0x9c,0x72] diff --git a/llvm/test/MC/LoongArch/lsx/frint.s b/llvm/test/MC/LoongArch/lsx/frint.s new file mode 100644 index 0000000000000000000000000000000000000000..53a43a4d3ecf2b5e1d9755fe8e2681fb069e7859 --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/frint.s @@ -0,0 +1,44 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vfrintrne.s $vr31, $vr2 +# CHECK-INST: vfrintrne.s $vr31, $vr2 +# CHECK-ENCODING: encoding: [0x5f,0x74,0x9d,0x72] + +vfrintrne.d $vr1, $vr30 +# CHECK-INST: vfrintrne.d $vr1, $vr30 +# CHECK-ENCODING: encoding: [0xc1,0x7b,0x9d,0x72] + +vfrintrz.s $vr16, $vr17 +# CHECK-INST: vfrintrz.s $vr16, $vr17 +# CHECK-ENCODING: encoding: [0x30,0x66,0x9d,0x72] + +vfrintrz.d $vr1, $vr31 +# CHECK-INST: vfrintrz.d $vr1, $vr31 +# CHECK-ENCODING: encoding: [0xe1,0x6b,0x9d,0x72] + +vfrintrp.s $vr11, $vr2 +# CHECK-INST: vfrintrp.s $vr11, $vr2 +# CHECK-ENCODING: encoding: [0x4b,0x54,0x9d,0x72] + +vfrintrp.d $vr30, $vr16 +# CHECK-INST: vfrintrp.d $vr30, $vr16 +# CHECK-ENCODING: encoding: [0x1e,0x5a,0x9d,0x72] + +vfrintrm.s $vr25, $vr23 +# CHECK-INST: vfrintrm.s $vr25, $vr23 +# CHECK-ENCODING: encoding: [0xf9,0x46,0x9d,0x72] + +vfrintrm.d $vr19, $vr11 +# CHECK-INST: vfrintrm.d $vr19, $vr11 +# CHECK-ENCODING: encoding: [0x73,0x49,0x9d,0x72] + +vfrint.s $vr22, $vr6 +# CHECK-INST: vfrint.s $vr22, $vr6 +# CHECK-ENCODING: encoding: [0xd6,0x34,0x9d,0x72] + +vfrint.d $vr26, $vr9 +# CHECK-INST: vfrint.d $vr26, $vr9 +# CHECK-ENCODING: encoding: [0x3a,0x39,0x9d,0x72] diff --git a/llvm/test/MC/LoongArch/lsx/frsqrt.s b/llvm/test/MC/LoongArch/lsx/frsqrt.s new file mode 100644 index 0000000000000000000000000000000000000000..68b0cc091b8ae254fc929b61647a2694bfe445f5 --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/frsqrt.s @@ -0,0 +1,12 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vfrsqrt.s $vr19, $vr30 +# CHECK-INST: vfrsqrt.s $vr19, $vr30 +# CHECK-ENCODING: encoding: [0xd3,0x07,0x9d,0x72] + +vfrsqrt.d $vr1, $vr0 +# CHECK-INST: vfrsqrt.d $vr1, $vr0 +# CHECK-ENCODING: encoding: [0x01,0x08,0x9d,0x72] diff --git a/llvm/test/MC/LoongArch/lsx/frstp.s b/llvm/test/MC/LoongArch/lsx/frstp.s new file mode 100644 index 0000000000000000000000000000000000000000..86bb31a1d39d40499a4526ad68e83baf621edb17 --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/frstp.s @@ -0,0 +1,20 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vfrstp.b $vr30, $vr25, $vr1 +# CHECK-INST: vfrstp.b $vr30, $vr25, $vr1 +# CHECK-ENCODING: encoding: [0x3e,0x07,0x2b,0x71] + +vfrstp.h $vr22, $vr26, $vr21 +# CHECK-INST: vfrstp.h $vr22, $vr26, $vr21 +# CHECK-ENCODING: encoding: [0x56,0xd7,0x2b,0x71] + +vfrstpi.b $vr12, $vr8, 28 +# CHECK-INST: vfrstpi.b $vr12, $vr8, 28 +# CHECK-ENCODING: encoding: [0x0c,0x71,0x9a,0x72] + +vfrstpi.h $vr5, $vr28, 29 +# CHECK-INST: vfrstpi.h $vr5, $vr28, 29 +# CHECK-ENCODING: encoding: [0x85,0xf7,0x9a,0x72] diff --git a/llvm/test/MC/LoongArch/lsx/fsqrt.s b/llvm/test/MC/LoongArch/lsx/fsqrt.s new file mode 100644 index 0000000000000000000000000000000000000000..a5df2416dfc3d06ed95e1cd842fae478cfb2556f --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/fsqrt.s @@ -0,0 +1,12 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vfsqrt.s $vr0, $vr3 +# CHECK-INST: vfsqrt.s $vr0, $vr3 +# CHECK-ENCODING: encoding: [0x60,0xe4,0x9c,0x72] + +vfsqrt.d $vr26, $vr9 +# CHECK-INST: vfsqrt.d $vr26, $vr9 +# CHECK-ENCODING: encoding: [0x3a,0xe9,0x9c,0x72] diff --git a/llvm/test/MC/LoongArch/lsx/fsub.s b/llvm/test/MC/LoongArch/lsx/fsub.s new file mode 100644 index 0000000000000000000000000000000000000000..30c36363129518c1ba87f57d29f5262ae5d6af7f --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/fsub.s @@ -0,0 +1,12 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vfsub.s $vr4, $vr9, $vr12 +# CHECK-INST: vfsub.s $vr4, $vr9, $vr12 +# CHECK-ENCODING: encoding: [0x24,0xb1,0x32,0x71] + +vfsub.d $vr12, $vr28, $vr27 +# CHECK-INST: vfsub.d $vr12, $vr28, $vr27 +# CHECK-ENCODING: encoding: [0x8c,0x6f,0x33,0x71] diff --git a/llvm/test/MC/LoongArch/lsx/ftint.s b/llvm/test/MC/LoongArch/lsx/ftint.s new file mode 100644 index 0000000000000000000000000000000000000000..9a50fddf81e58b06245fcadc1479579347f57d9c --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/ftint.s @@ -0,0 +1,120 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vftintrne.w.s $vr25, $vr28 +# CHECK-INST: vftintrne.w.s $vr25, $vr28 +# CHECK-ENCODING: encoding: [0x99,0x53,0x9e,0x72] + +vftintrne.l.d $vr26, $vr27 +# CHECK-INST: vftintrne.l.d $vr26, $vr27 +# CHECK-ENCODING: encoding: [0x7a,0x57,0x9e,0x72] + +vftintrz.w.s $vr24, $vr29 +# CHECK-INST: vftintrz.w.s $vr24, $vr29 +# CHECK-ENCODING: encoding: [0xb8,0x4b,0x9e,0x72] + +vftintrz.l.d $vr17, $vr12 +# CHECK-INST: vftintrz.l.d $vr17, $vr12 +# CHECK-ENCODING: encoding: [0x91,0x4d,0x9e,0x72] + +vftintrp.w.s $vr1, $vr6 +# CHECK-INST: vftintrp.w.s $vr1, $vr6 +# CHECK-ENCODING: encoding: [0xc1,0x40,0x9e,0x72] + +vftintrp.l.d $vr8, $vr26 +# CHECK-INST: vftintrp.l.d $vr8, $vr26 +# CHECK-ENCODING: encoding: [0x48,0x47,0x9e,0x72] + +vftintrm.w.s $vr4, $vr30 +# CHECK-INST: vftintrm.w.s $vr4, $vr30 +# CHECK-ENCODING: encoding: [0xc4,0x3b,0x9e,0x72] + +vftintrm.l.d $vr18, $vr0 +# CHECK-INST: vftintrm.l.d $vr18, $vr0 +# CHECK-ENCODING: encoding: [0x12,0x3c,0x9e,0x72] + +vftint.w.s $vr0, $vr27 +# CHECK-INST: vftint.w.s $vr0, $vr27 +# CHECK-ENCODING: encoding: [0x60,0x33,0x9e,0x72] + +vftint.l.d $vr21, $vr22 +# CHECK-INST: vftint.l.d $vr21, $vr22 +# CHECK-ENCODING: encoding: [0xd5,0x36,0x9e,0x72] + +vftintrz.wu.s $vr8, $vr3 +# CHECK-INST: vftintrz.wu.s $vr8, $vr3 +# CHECK-ENCODING: encoding: [0x68,0x70,0x9e,0x72] + +vftintrz.lu.d $vr25, $vr9 +# CHECK-INST: vftintrz.lu.d $vr25, $vr9 +# CHECK-ENCODING: encoding: [0x39,0x75,0x9e,0x72] + +vftint.wu.s $vr8, $vr8 +# CHECK-INST: vftint.wu.s $vr8, $vr8 +# CHECK-ENCODING: encoding: [0x08,0x59,0x9e,0x72] + +vftint.lu.d $vr1, $vr17 +# CHECK-INST: vftint.lu.d $vr1, $vr17 +# CHECK-ENCODING: encoding: [0x21,0x5e,0x9e,0x72] + +vftintrne.w.d $vr4, $vr18, $vr18 +# CHECK-INST: vftintrne.w.d $vr4, $vr18, $vr18 +# CHECK-ENCODING: encoding: [0x44,0xca,0x4b,0x71] + +vftintrz.w.d $vr26, $vr18, $vr4 +# CHECK-INST: vftintrz.w.d $vr26, $vr18, $vr4 +# CHECK-ENCODING: encoding: [0x5a,0x12,0x4b,0x71] + +vftintrp.w.d $vr25, $vr0, $vr23 +# CHECK-INST: vftintrp.w.d $vr25, $vr0, $vr23 +# CHECK-ENCODING: encoding: [0x19,0xdc,0x4a,0x71] + +vftintrm.w.d $vr30, $vr25, $vr5 +# CHECK-INST: vftintrm.w.d $vr30, $vr25, $vr5 +# CHECK-ENCODING: encoding: [0x3e,0x17,0x4a,0x71] + +vftint.w.d $vr27, $vr28, $vr6 +# CHECK-INST: vftint.w.d $vr27, $vr28, $vr6 +# CHECK-ENCODING: encoding: [0x9b,0x9b,0x49,0x71] + +vftintrnel.l.s $vr7, $vr8 +# CHECK-INST: vftintrnel.l.s $vr7, $vr8 +# CHECK-ENCODING: encoding: [0x07,0xa1,0x9e,0x72] + +vftintrneh.l.s $vr21, $vr26 +# CHECK-INST: vftintrneh.l.s $vr21, $vr26 +# CHECK-ENCODING: encoding: [0x55,0xa7,0x9e,0x72] + +vftintrzl.l.s $vr21, $vr18 +# CHECK-INST: vftintrzl.l.s $vr21, $vr18 +# CHECK-ENCODING: encoding: [0x55,0x9a,0x9e,0x72] + +vftintrzh.l.s $vr22, $vr16 +# CHECK-INST: vftintrzh.l.s $vr22, $vr16 +# CHECK-ENCODING: encoding: [0x16,0x9e,0x9e,0x72] + +vftintrpl.l.s $vr25, $vr19 +# CHECK-INST: vftintrpl.l.s $vr25, $vr19 +# CHECK-ENCODING: encoding: [0x79,0x92,0x9e,0x72] + +vftintrph.l.s $vr11, $vr22 +# CHECK-INST: vftintrph.l.s $vr11, $vr22 +# CHECK-ENCODING: encoding: [0xcb,0x96,0x9e,0x72] + +vftintrml.l.s $vr6, $vr28 +# CHECK-INST: vftintrml.l.s $vr6, $vr28 +# CHECK-ENCODING: encoding: [0x86,0x8b,0x9e,0x72] + +vftintrmh.l.s $vr17, $vr11 +# CHECK-INST: vftintrmh.l.s $vr17, $vr11 +# CHECK-ENCODING: encoding: [0x71,0x8d,0x9e,0x72] + +vftintl.l.s $vr3, $vr28 +# CHECK-INST: vftintl.l.s $vr3, $vr28 +# CHECK-ENCODING: encoding: [0x83,0x83,0x9e,0x72] + +vftinth.l.s $vr11, $vr30 +# CHECK-INST: vftinth.l.s $vr11, $vr30 +# CHECK-ENCODING: encoding: [0xcb,0x87,0x9e,0x72] diff --git a/llvm/test/MC/LoongArch/lsx/haddw.s b/llvm/test/MC/LoongArch/lsx/haddw.s new file mode 100644 index 0000000000000000000000000000000000000000..1958941fa90f6a51e0dab1524b8ebea1c21eb5fc --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/haddw.s @@ -0,0 +1,36 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vhaddw.h.b $vr3, $vr14, $vr11 +# CHECK-INST: vhaddw.h.b $vr3, $vr14, $vr11 +# CHECK-ENCODING: encoding: [0xc3,0x2d,0x54,0x70] + +vhaddw.w.h $vr3, $vr9, $vr9 +# CHECK-INST: vhaddw.w.h $vr3, $vr9, $vr9 +# CHECK-ENCODING: encoding: [0x23,0xa5,0x54,0x70] + +vhaddw.d.w $vr7, $vr26, $vr6 +# CHECK-INST: vhaddw.d.w $vr7, $vr26, $vr6 +# CHECK-ENCODING: encoding: [0x47,0x1b,0x55,0x70] + +vhaddw.q.d $vr22, $vr25, $vr19 +# CHECK-INST: vhaddw.q.d $vr22, $vr25, $vr19 +# CHECK-ENCODING: encoding: [0x36,0xcf,0x55,0x70] + +vhaddw.hu.bu $vr8, $vr21, $vr21 +# CHECK-INST: vhaddw.hu.bu $vr8, $vr21, $vr21 +# CHECK-ENCODING: encoding: [0xa8,0x56,0x58,0x70] + +vhaddw.wu.hu $vr23, $vr23, $vr20 +# CHECK-INST: vhaddw.wu.hu $vr23, $vr23, $vr20 +# CHECK-ENCODING: encoding: [0xf7,0xd2,0x58,0x70] + +vhaddw.du.wu $vr13, $vr7, $vr6 +# CHECK-INST: vhaddw.du.wu $vr13, $vr7, $vr6 +# CHECK-ENCODING: encoding: [0xed,0x18,0x59,0x70] + +vhaddw.qu.du $vr19, $vr12, $vr6 +# CHECK-INST: vhaddw.qu.du $vr19, $vr12, $vr6 +# CHECK-ENCODING: encoding: [0x93,0x99,0x59,0x70] diff --git a/llvm/test/MC/LoongArch/lsx/hsubw.s b/llvm/test/MC/LoongArch/lsx/hsubw.s new file mode 100644 index 0000000000000000000000000000000000000000..933f5840b8c63c5414441e57ab4776ae054e9886 --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/hsubw.s @@ -0,0 +1,36 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vhsubw.h.b $vr24, $vr26, $vr16 +# CHECK-INST: vhsubw.h.b $vr24, $vr26, $vr16 +# CHECK-ENCODING: encoding: [0x58,0x43,0x56,0x70] + +vhsubw.w.h $vr5, $vr28, $vr12 +# CHECK-INST: vhsubw.w.h $vr5, $vr28, $vr12 +# CHECK-ENCODING: encoding: [0x85,0xb3,0x56,0x70] + +vhsubw.d.w $vr8, $vr5, $vr22 +# CHECK-INST: vhsubw.d.w $vr8, $vr5, $vr22 +# CHECK-ENCODING: encoding: [0xa8,0x58,0x57,0x70] + +vhsubw.q.d $vr21, $vr16, $vr14 +# CHECK-INST: vhsubw.q.d $vr21, $vr16, $vr14 +# CHECK-ENCODING: encoding: [0x15,0xba,0x57,0x70] + +vhsubw.hu.bu $vr12, $vr31, $vr30 +# CHECK-INST: vhsubw.hu.bu $vr12, $vr31, $vr30 +# CHECK-ENCODING: encoding: [0xec,0x7b,0x5a,0x70] + +vhsubw.wu.hu $vr18, $vr13, $vr31 +# CHECK-INST: vhsubw.wu.hu $vr18, $vr13, $vr31 +# CHECK-ENCODING: encoding: [0xb2,0xfd,0x5a,0x70] + +vhsubw.du.wu $vr0, $vr1, $vr2 +# CHECK-INST: vhsubw.du.wu $vr0, $vr1, $vr2 +# CHECK-ENCODING: encoding: [0x20,0x08,0x5b,0x70] + +vhsubw.qu.du $vr30, $vr31, $vr5 +# CHECK-INST: vhsubw.qu.du $vr30, $vr31, $vr5 +# CHECK-ENCODING: encoding: [0xfe,0x97,0x5b,0x70] diff --git a/llvm/test/MC/LoongArch/lsx/ilv.s b/llvm/test/MC/LoongArch/lsx/ilv.s new file mode 100644 index 0000000000000000000000000000000000000000..8775e1ce19d3d6ba1670041c3a6ad4f8c9cb453a --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/ilv.s @@ -0,0 +1,36 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vilvl.b $vr9, $vr30, $vr20 +# CHECK-INST: vilvl.b $vr9, $vr30, $vr20 +# CHECK-ENCODING: encoding: [0xc9,0x53,0x1a,0x71] + +vilvl.h $vr6, $vr19, $vr30 +# CHECK-INST: vilvl.h $vr6, $vr19, $vr30 +# CHECK-ENCODING: encoding: [0x66,0xfa,0x1a,0x71] + +vilvl.w $vr18, $vr3, $vr15 +# CHECK-INST: vilvl.w $vr18, $vr3, $vr15 +# CHECK-ENCODING: encoding: [0x72,0x3c,0x1b,0x71] + +vilvl.d $vr20, $vr22, $vr9 +# CHECK-INST: vilvl.d $vr20, $vr22, $vr9 +# CHECK-ENCODING: encoding: [0xd4,0xa6,0x1b,0x71] + +vilvh.b $vr14, $vr4, $vr12 +# CHECK-INST: vilvh.b $vr14, $vr4, $vr12 +# CHECK-ENCODING: encoding: [0x8e,0x30,0x1c,0x71] + +vilvh.h $vr2, $vr0, $vr6 +# CHECK-INST: vilvh.h $vr2, $vr0, $vr6 +# CHECK-ENCODING: encoding: [0x02,0x98,0x1c,0x71] + +vilvh.w $vr7, $vr27, $vr15 +# CHECK-INST: vilvh.w $vr7, $vr27, $vr15 +# CHECK-ENCODING: encoding: [0x67,0x3f,0x1d,0x71] + +vilvh.d $vr9, $vr25, $vr29 +# CHECK-INST: vilvh.d $vr9, $vr25, $vr29 +# CHECK-ENCODING: encoding: [0x29,0xf7,0x1d,0x71] diff --git a/llvm/test/MC/LoongArch/lsx/insgr2vr.s b/llvm/test/MC/LoongArch/lsx/insgr2vr.s new file mode 100644 index 0000000000000000000000000000000000000000..b8b8fe7fb4dfaa3bc7dd6e3c595d08374d2ce45e --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/insgr2vr.s @@ -0,0 +1,20 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vinsgr2vr.b $vr23, $r20, 2 +# CHECK-INST: vinsgr2vr.b $vr23, $t8, 2 +# CHECK-ENCODING: encoding: [0x97,0x8a,0xeb,0x72] + +vinsgr2vr.h $vr7, $r5, 7 +# CHECK-INST: vinsgr2vr.h $vr7, $a1, 7 +# CHECK-ENCODING: encoding: [0xa7,0xdc,0xeb,0x72] + +vinsgr2vr.w $vr8, $r6, 2 +# CHECK-INST: vinsgr2vr.w $vr8, $a2, 2 +# CHECK-ENCODING: encoding: [0xc8,0xe8,0xeb,0x72] + +vinsgr2vr.d $vr17, $r24, 1 +# CHECK-INST: vinsgr2vr.d $vr17, $s1, 1 +# CHECK-ENCODING: encoding: [0x11,0xf7,0xeb,0x72] diff --git a/llvm/test/MC/LoongArch/lsx/invalid-imm.s b/llvm/test/MC/LoongArch/lsx/invalid-imm.s new file mode 100644 index 0000000000000000000000000000000000000000..c3f9aaa082818ee7b5a5a1a70fba325474ed82ca --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/invalid-imm.s @@ -0,0 +1,1192 @@ +## Test out of range immediates which are used by lsx instructions. + +# RUN: not llvm-mc --triple=loongarch64 %s 2>&1 | FileCheck %s + +## uimm1 +vstelm.d $vr0, $a0, 8, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 1] + +vstelm.d $vr0, $a0, 8, 2 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 1] + +vreplvei.d $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 1] + +vreplvei.d $vr0, $vr1, 2 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 1] + +vpickve2gr.du $a0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 1] + +vpickve2gr.du $a0, $vr1, 2 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 1] + +vpickve2gr.d $a0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 1] + +vpickve2gr.d $a0, $vr1, 2 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 1] + +vinsgr2vr.d $vr0, $a0, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 1] + +vinsgr2vr.d $vr0, $a0, 2 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 1] + +## uimm2 +vstelm.w $vr0, $a0, 4, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 3] + +vstelm.w $vr0, $a0, 4, 4 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 3] + +vreplvei.w $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 3] + +vreplvei.w $vr0, $vr1, 4 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 3] + +vpickve2gr.wu $a0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 3] + +vpickve2gr.wu $a0, $vr1, 4 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 3] + +vpickve2gr.w $a0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 3] + +vpickve2gr.w $a0, $vr1, 4 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 3] + +vinsgr2vr.w $vr0, $a0, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 3] + +vinsgr2vr.w $vr0, $a0, 4 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 3] + +## uimm3 +vstelm.h $vr0, $a0, 2, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] + +vstelm.h $vr0, $a0, 2, 8 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] + +vreplvei.h $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] + +vreplvei.h $vr0, $vr1, 8 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] + +vpickve2gr.hu $a0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 7] + +vpickve2gr.hu $a0, $vr1, 8 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 7] + +vpickve2gr.h $a0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 7] + +vpickve2gr.h $a0, $vr1, 8 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 7] + +vinsgr2vr.h $vr0, $a0, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] + +vinsgr2vr.h $vr0, $a0, 8 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] + +vbitrevi.b $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] + +vbitrevi.b $vr0, $vr1, 8 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] + +vbitseti.b $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] + +vbitseti.b $vr0, $vr1, 8 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] + +vbitclri.b $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] + +vbitclri.b $vr0, $vr1, 8 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 7] + +vsrari.b $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 7] + +vsrari.b $vr0, $vr1, 8 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 7] + +vsrlri.b $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 7] + +vsrlri.b $vr0, $vr1, 8 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 7] + +vsllwil.hu.bu $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 7] + +vsllwil.hu.bu $vr0, $vr1, 8 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 7] + +vsllwil.h.b $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 7] + +vsllwil.h.b $vr0, $vr1, 8 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 7] + +vrotri.b $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 7] + +vrotri.b $vr0, $vr1, 8 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 7] + +vsrai.b $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 7] + +vsrai.b $vr0, $vr1, 8 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 7] + +vsrli.b $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 7] + +vsrli.b $vr0, $vr1, 8 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 7] + +vslli.b $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 7] + +vslli.b $vr0, $vr1, 8 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 7] + +vsat.b $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:20: error: immediate must be an integer in the range [0, 7] + +vsat.b $vr0, $vr1, 8 +# CHECK: :[[#@LINE-1]]:20: error: immediate must be an integer in the range [0, 7] + +vsat.bu $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 7] + +vsat.bu $vr0, $vr1, 8 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 7] + +## uimm4 +vstelm.b $vr0, $a0, 1, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] + +vstelm.b $vr0, $a0, 1, 16 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] + +vreplvei.b $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] + +vreplvei.b $vr0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] + +vpickve2gr.bu $a0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] + +vpickve2gr.bu $a0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] + +vpickve2gr.b $a0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] + +vpickve2gr.b $a0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] + +vinsgr2vr.b $vr0, $a0, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] + +vinsgr2vr.b $vr0, $a0, 16 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] + +vbitrevi.h $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] + +vbitrevi.h $vr0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] + +vbitseti.h $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] + +vbitseti.h $vr0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] + +vbitclri.h $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] + +vbitclri.h $vr0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] + +vssrarni.bu.h $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 15] + +vssrarni.bu.h $vr0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 15] + +vssrlrni.bu.h $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 15] + +vssrlrni.bu.h $vr0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 15] + +vssrarni.b.h $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] + +vssrarni.b.h $vr0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] + +vssrlrni.b.h $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] + +vssrlrni.b.h $vr0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] + +vssrani.bu.h $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] + +vssrani.bu.h $vr0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] + +vssrlni.bu.h $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] + +vssrlni.bu.h $vr0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 15] + +vssrani.b.h $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] + +vssrani.b.h $vr0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] + +vssrlni.b.h $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] + +vssrlni.b.h $vr0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] + +vsrarni.b.h $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] + +vsrarni.b.h $vr0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] + +vsrlrni.b.h $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] + +vsrlrni.b.h $vr0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] + +vsrani.b.h $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] + +vsrani.b.h $vr0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] + +vsrlni.b.h $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] + +vsrlni.b.h $vr0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 15] + +vsrari.h $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 15] + +vsrari.h $vr0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 15] + +vsrlri.h $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 15] + +vsrlri.h $vr0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 15] + +vsllwil.wu.hu $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 15] + +vsllwil.wu.hu $vr0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 15] + +vsllwil.w.h $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] + +vsllwil.w.h $vr0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 15] + +vrotri.h $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 15] + +vrotri.h $vr0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 15] + +vsrai.h $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 15] + +vsrai.h $vr0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 15] + +vsrli.h $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 15] + +vsrli.h $vr0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 15] + +vslli.h $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 15] + +vslli.h $vr0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 15] + +vsat.h $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:20: error: immediate must be an integer in the range [0, 15] + +vsat.h $vr0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:20: error: immediate must be an integer in the range [0, 15] + +vsat.hu $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 15] + +vsat.hu $vr0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 15] + +## uimm5 +vbsrl.v $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 31] + +vbsrl.v $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 31] + +vbsll.v $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 31] + +vbsll.v $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 31] + +vslti.du $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vslti.du $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vslti.wu $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vslti.wu $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vslti.hu $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vslti.hu $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vslti.bu $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vslti.bu $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vslei.du $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vslei.du $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vslei.wu $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vslei.wu $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vslei.hu $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vslei.hu $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vslei.bu $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vslei.bu $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vfrstpi.h $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +vfrstpi.h $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +vfrstpi.b $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +vfrstpi.b $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 31] + +vbitrevi.w $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 31] + +vbitrevi.w $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 31] + +vbitseti.w $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 31] + +vbitseti.w $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 31] + +vbitclri.w $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 31] + +vbitclri.w $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 31] + +vssrarni.hu.w $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 31] + +vssrarni.hu.w $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 31] + +vssrlrni.hu.w $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 31] + +vssrlrni.hu.w $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 31] + +vssrarni.h.w $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] + +vssrarni.h.w $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] + +vssrlrni.h.w $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] + +vssrlrni.h.w $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] + +vssrani.hu.w $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] + +vssrani.hu.w $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] + +vssrlni.hu.w $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] + +vssrlni.hu.w $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 31] + +vssrani.h.w $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] + +vssrani.h.w $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] + +vssrlni.h.w $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] + +vssrlni.h.w $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] + +vsrarni.h.w $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] + +vsrarni.h.w $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] + +vsrlrni.h.w $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] + +vsrlrni.h.w $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] + +vsrani.h.w $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 31] + +vsrani.h.w $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 31] + +vsrlni.h.w $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 31] + +vsrlni.h.w $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 31] + +vsrari.w $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vsrari.w $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vsrlri.w $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vsrlri.w $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vsllwil.du.wu $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 31] + +vsllwil.du.wu $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 31] + +vsllwil.d.w $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] + +vsllwil.d.w $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 31] + +vrotri.w $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vrotri.w $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vsrai.w $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 31] + +vsrai.w $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 31] + +vsrli.w $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 31] + +vsrli.w $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 31] + +vslli.w $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 31] + +vslli.w $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 31] + +vaddi.bu $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vaddi.bu $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vaddi.hu $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vaddi.hu $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vaddi.wu $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vaddi.wu $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vaddi.du $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vaddi.du $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vsubi.bu $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vsubi.bu $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vsubi.hu $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vsubi.hu $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vsubi.wu $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vsubi.wu $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vsubi.du $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vsubi.du $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vmaxi.bu $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vmaxi.bu $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vmaxi.hu $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vmaxi.hu $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vmaxi.wu $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vmaxi.wu $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vmaxi.du $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vmaxi.du $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vmini.bu $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vmini.bu $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vmini.hu $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vmini.hu $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vmini.wu $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vmini.wu $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vmini.du $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vmini.du $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 31] + +vsat.w $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:20: error: immediate must be an integer in the range [0, 31] + +vsat.w $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:20: error: immediate must be an integer in the range [0, 31] + +vsat.wu $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 31] + +vsat.wu $vr0, $vr1, 32 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 31] + +## simm5 +vslti.d $vr0, $vr1, -17 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] + +vslti.d $vr0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] + +vslti.w $vr0, $vr1, -17 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] + +vslti.w $vr0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] + +vslti.h $vr0, $vr1, -17 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] + +vslti.h $vr0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] + +vslti.b $vr0, $vr1, -17 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] + +vslti.b $vr0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] + +vslei.d $vr0, $vr1, -17 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] + +vslei.d $vr0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] + +vslei.w $vr0, $vr1, -17 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] + +vslei.w $vr0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] + +vslei.h $vr0, $vr1, -17 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] + +vslei.h $vr0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] + +vslei.b $vr0, $vr1, -17 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] + +vslei.b $vr0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] + +vseqi.d $vr0, $vr1, -17 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] + +vseqi.d $vr0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] + +vseqi.w $vr0, $vr1, -17 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] + +vseqi.w $vr0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] + +vseqi.h $vr0, $vr1, -17 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] + +vseqi.h $vr0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] + +vseqi.b $vr0, $vr1, -17 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] + +vseqi.b $vr0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] + +vmaxi.b $vr0, $vr1, -17 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] + +vmaxi.b $vr0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] + +vmaxi.h $vr0, $vr1, -17 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] + +vmaxi.h $vr0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] + +vmaxi.w $vr0, $vr1, -17 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] + +vmaxi.w $vr0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] + +vmaxi.d $vr0, $vr1, -17 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] + +vmaxi.d $vr0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] + +vmini.b $vr0, $vr1, -17 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] + +vmini.b $vr0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] + +vmini.h $vr0, $vr1, -17 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] + +vmini.h $vr0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] + +vmini.w $vr0, $vr1, -17 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] + +vmini.w $vr0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] + +vmini.d $vr0, $vr1, -17 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] + +vmini.d $vr0, $vr1, 16 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-16, 15] + +## uimm6 +vbitrevi.d $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 63] + +vbitrevi.d $vr0, $vr1, 64 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 63] + +vbitseti.d $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 63] + +vbitseti.d $vr0, $vr1, 64 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 63] + +vbitclri.d $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 63] + +vbitclri.d $vr0, $vr1, 64 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 63] + +vssrarni.wu.d $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 63] + +vssrarni.wu.d $vr0, $vr1, 64 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 63] + +vssrlrni.wu.d $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 63] + +vssrlrni.wu.d $vr0, $vr1, 64 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 63] + +vssrarni.w.d $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] + +vssrarni.w.d $vr0, $vr1, 64 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] + +vssrlrni.w.d $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] + +vssrlrni.w.d $vr0, $vr1, 64 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] + +vssrani.wu.d $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] + +vssrani.wu.d $vr0, $vr1, 64 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] + +vssrlni.wu.d $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] + +vssrlni.wu.d $vr0, $vr1, 64 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 63] + +vssrani.w.d $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] + +vssrani.w.d $vr0, $vr1, 64 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] + +vssrlni.w.d $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] + +vssrlni.w.d $vr0, $vr1, 64 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] + +vsrarni.w.d $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] + +vsrarni.w.d $vr0, $vr1, 64 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] + +vsrlrni.w.d $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] + +vsrlrni.w.d $vr0, $vr1, 64 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 63] + +vsrani.w.d $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 63] + +vsrani.w.d $vr0, $vr1, 64 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 63] + +vsrlni.w.d $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 63] + +vsrlni.w.d $vr0, $vr1, 64 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 63] + +vsrari.d $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 63] + +vsrari.d $vr0, $vr1, 64 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 63] + +vsrlri.d $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 63] + +vsrlri.d $vr0, $vr1, 64 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 63] + +vrotri.d $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 63] + +vrotri.d $vr0, $vr1, 64 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 63] + +vsrai.d $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 63] + +vsrai.d $vr0, $vr1, 64 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 63] + +vsrli.d $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 63] + +vsrli.d $vr0, $vr1, 64 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 63] + +vslli.d $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 63] + +vslli.d $vr0, $vr1, 64 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 63] + +vsat.d $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:20: error: immediate must be an integer in the range [0, 63] + +vsat.d $vr0, $vr1, 64 +# CHECK: :[[#@LINE-1]]:20: error: immediate must be an integer in the range [0, 63] + +vsat.du $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 63] + +vsat.du $vr0, $vr1, 64 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 63] + +## uimm7 +vssrarni.du.q $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 127] + +vssrarni.du.q $vr0, $vr1, 128 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 127] + +vssrlrni.du.q $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 127] + +vssrlrni.du.q $vr0, $vr1, 128 +# CHECK: :[[#@LINE-1]]:27: error: immediate must be an integer in the range [0, 127] + +vssrarni.d.q $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] + +vssrarni.d.q $vr0, $vr1, 128 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] + +vssrlrni.d.q $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] + +vssrlrni.d.q $vr0, $vr1, 128 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] + +vssrani.du.q $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] + +vssrani.du.q $vr0, $vr1, 128 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] + +vssrlni.du.q $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] + +vssrlni.du.q $vr0, $vr1, 128 +# CHECK: :[[#@LINE-1]]:26: error: immediate must be an integer in the range [0, 127] + +vssrani.d.q $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 127] + +vssrani.d.q $vr0, $vr1, 128 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 127] + +vssrlni.d.q $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 127] + +vssrlni.d.q $vr0, $vr1, 128 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 127] + +vsrarni.d.q $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 127] + +vsrarni.d.q $vr0, $vr1, 128 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 127] + +vsrlrni.d.q $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 127] + +vsrlrni.d.q $vr0, $vr1, 128 +# CHECK: :[[#@LINE-1]]:25: error: immediate must be an integer in the range [0, 127] + +vsrani.d.q $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 127] + +vsrani.d.q $vr0, $vr1, 128 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 127] + +vsrlni.d.q $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 127] + +vsrlni.d.q $vr0, $vr1, 128 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 127] + +## uimm8 +vextrins.d $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] + +vextrins.d $vr0, $vr1, 256 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] + +vextrins.w $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] + +vextrins.w $vr0, $vr1, 256 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] + +vextrins.h $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] + +vextrins.h $vr0, $vr1, 256 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] + +vextrins.b $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] + +vextrins.b $vr0, $vr1, 256 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] + +vpermi.w $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 255] + +vpermi.w $vr0, $vr1, 256 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [0, 255] + +vshuf4i.d $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 255] + +vshuf4i.d $vr0, $vr1, 256 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 255] + +vshuf4i.w $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 255] + +vshuf4i.w $vr0, $vr1, 256 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 255] + +vshuf4i.h $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 255] + +vshuf4i.h $vr0, $vr1, 256 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 255] + +vshuf4i.b $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 255] + +vshuf4i.b $vr0, $vr1, 256 +# CHECK: :[[#@LINE-1]]:23: error: immediate must be an integer in the range [0, 255] + +vbitseli.b $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] + +vbitseli.b $vr0, $vr1, 256 +# CHECK: :[[#@LINE-1]]:24: error: immediate must be an integer in the range [0, 255] + +vandi.b $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 255] + +vandi.b $vr0, $vr1, 256 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 255] + +vori.b $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:20: error: immediate must be an integer in the range [0, 255] + +vori.b $vr0, $vr1, 256 +# CHECK: :[[#@LINE-1]]:20: error: immediate must be an integer in the range [0, 255] + +vxori.b $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 255] + +vxori.b $vr0, $vr1, 256 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 255] + +vnori.b $vr0, $vr1, -1 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 255] + +vnori.b $vr0, $vr1, 256 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [0, 255] + +## simm8 +vstelm.b $vr0, $a0, -129, 1 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-128, 127] + +vstelm.b $vr0, $a0, 128, 1 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be an integer in the range [-128, 127] + +## simm8_lsl1 +vstelm.h $vr0, $a0, -258, 1 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be a multiple of 2 in the range [-256, 254] + +vstelm.h $vr0, $a0, 256, 1 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be a multiple of 2 in the range [-256, 254] + +## simm8_lsl2 +vstelm.w $vr0, $a0, -516, 1 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be a multiple of 4 in the range [-512, 508] + +vstelm.w $vr0, $a0, 512, 1 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be a multiple of 4 in the range [-512, 508] + +## simm8_lsl3 +vstelm.d $vr0, $a0, -1032, 1 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be a multiple of 8 in the range [-1024, 1016] + +vstelm.d $vr0, $a0, 1024, 1 +# CHECK: :[[#@LINE-1]]:21: error: immediate must be a multiple of 8 in the range [-1024, 1016] + +## simm9_lsl3 +vldrepl.d $vr0, $a0, -2056 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be a multiple of 8 in the range [-2048, 2040] + +vldrepl.d $vr0, $a0, 2048 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be a multiple of 8 in the range [-2048, 2040] + +## simm10_lsl2 +vldrepl.w $vr0, $a0, -2052 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be a multiple of 4 in the range [-2048, 2044] + +vldrepl.w $vr0, $a0, 2048 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be a multiple of 4 in the range [-2048, 2044] + +## simm10 +vrepli.b $vr0, -513 +# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-512, 511] + +vrepli.b $vr0, 512 +# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-512, 511] + +vrepli.h $vr0, -513 +# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-512, 511] + +vrepli.h $vr0, 512 +# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-512, 511] + +vrepli.w $vr0, -513 +# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-512, 511] + +vrepli.w $vr0, 512 +# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-512, 511] + +vrepli.d $vr0, -513 +# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-512, 511] + +vrepli.d $vr0, 512 +# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-512, 511] + +## simm11_lsl1 +vldrepl.h $vr0, $a0, -2050 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be a multiple of 2 in the range [-2048, 2046] + +vldrepl.h $vr0, $a0, 2048 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be a multiple of 2 in the range [-2048, 2046] + +## simm12 +vldrepl.b $vr0, $a0, -2049 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-2048, 2047] + +vldrepl.b $vr0, $a0, 2048 +# CHECK: :[[#@LINE-1]]:22: error: immediate must be an integer in the range [-2048, 2047] + +vst $vr0, $a0, -2049 +# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-2048, 2047] + +vst $vr0, $a0, 2048 +# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-2048, 2047] + +vld $vr0, $a0, -2049 +# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-2048, 2047] + +vld $vr0, $a0, 2048 +# CHECK: :[[#@LINE-1]]:16: error: immediate must be an integer in the range [-2048, 2047] + +## simm13 +vldi $vr0, -4097 +# CHECK: :[[#@LINE-1]]:12: error: immediate must be an integer in the range [-4096, 4095] + +vldi $vr0, 4096 +# CHECK: :[[#@LINE-1]]:12: error: immediate must be an integer in the range [-4096, 4095] diff --git a/llvm/test/MC/LoongArch/lsx/ld.s b/llvm/test/MC/LoongArch/lsx/ld.s new file mode 100644 index 0000000000000000000000000000000000000000..642c842eb6e81268acb30dbbbede20c110cf7d30 --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/ld.s @@ -0,0 +1,12 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vld $vr0, $r12, -536 +# CHECK-INST: vld $vr0, $t0, -536 +# CHECK-ENCODING: encoding: [0x80,0xa1,0x37,0x2c] + +vldx $vr21, $r14, $r20 +# CHECK-INST: vldx $vr21, $t2, $t8 +# CHECK-ENCODING: encoding: [0xd5,0x51,0x40,0x38] diff --git a/llvm/test/MC/LoongArch/lsx/ldi.s b/llvm/test/MC/LoongArch/lsx/ldi.s new file mode 100644 index 0000000000000000000000000000000000000000..59a1c2b27f4ee8dbca9d10f2494fadf3dca6d3ea --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/ldi.s @@ -0,0 +1,29 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-OBJ + +vldi $vr26, -3212 +# CHECK-INST: vldi $vr26, -3212 +# CHECK-ENCODING: encoding: [0x9a,0x6e,0xe2,0x73] +# CHECK-OBJ: vldi $vr26, -3212 + +vrepli.b $vr26, -512 +# CHECK-INST: vrepli.b $vr26, -512 +# CHECK-ENCODING: encoding: [0x1a,0x40,0xe0,0x73] +# CHECK-OBJ: vldi $vr26, 512 + +vrepli.h $vr26, -512 +# CHECK-INST: vrepli.h $vr26, -512 +# CHECK-ENCODING: encoding: [0x1a,0xc0,0xe0,0x73] +# CHECK-OBJ: vldi $vr26, 1536 + +vrepli.w $vr26, -512 +# CHECK-INST: vrepli.w $vr26, -512 +# CHECK-ENCODING: encoding: [0x1a,0x40,0xe1,0x73] +# CHECK-OBJ: vldi $vr26, 2560 + +vrepli.d $vr26, -512 +# CHECK-INST: vrepli.d $vr26, -512 +# CHECK-ENCODING: encoding: [0x1a,0xc0,0xe1,0x73] +# CHECK-OBJ: vldi $vr26, 3584 diff --git a/llvm/test/MC/LoongArch/lsx/ldrepl.s b/llvm/test/MC/LoongArch/lsx/ldrepl.s new file mode 100644 index 0000000000000000000000000000000000000000..75830ae20f2a1075058834120835792f04a18689 --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/ldrepl.s @@ -0,0 +1,20 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vldrepl.b $vr3, $r3, -1553 +# CHECK-INST: vldrepl.b $vr3, $sp, -1553 +# CHECK-ENCODING: encoding: [0x63,0xbc,0xa7,0x30] + +vldrepl.h $vr23, $r22, 172 +# CHECK-INST: vldrepl.h $vr23, $fp, 172 +# CHECK-ENCODING: encoding: [0xd7,0x5a,0x41,0x30] + +vldrepl.w $vr12, $r27, -1304 +# CHECK-INST: vldrepl.w $vr12, $s4, -1304 +# CHECK-ENCODING: encoding: [0x6c,0xeb,0x2a,0x30] + +vldrepl.d $vr7, $r31, -1376 +# CHECK-INST: vldrepl.d $vr7, $s8, -1376 +# CHECK-ENCODING: encoding: [0xe7,0x53,0x15,0x30] diff --git a/llvm/test/MC/LoongArch/lsx/madd.s b/llvm/test/MC/LoongArch/lsx/madd.s new file mode 100644 index 0000000000000000000000000000000000000000..9ae6500c2efb6f296f81cce975b00a35cf88dbbf --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/madd.s @@ -0,0 +1,20 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vmadd.b $vr13, $vr5, $vr10 +# CHECK-INST: vmadd.b $vr13, $vr5, $vr10 +# CHECK-ENCODING: encoding: [0xad,0x28,0xa8,0x70] + +vmadd.h $vr11, $vr15, $vr8 +# CHECK-INST: vmadd.h $vr11, $vr15, $vr8 +# CHECK-ENCODING: encoding: [0xeb,0xa1,0xa8,0x70] + +vmadd.w $vr5, $vr17, $vr16 +# CHECK-INST: vmadd.w $vr5, $vr17, $vr16 +# CHECK-ENCODING: encoding: [0x25,0x42,0xa9,0x70] + +vmadd.d $vr29, $vr11, $vr12 +# CHECK-INST: vmadd.d $vr29, $vr11, $vr12 +# CHECK-ENCODING: encoding: [0x7d,0xb1,0xa9,0x70] diff --git a/llvm/test/MC/LoongArch/lsx/maddw.s b/llvm/test/MC/LoongArch/lsx/maddw.s new file mode 100644 index 0000000000000000000000000000000000000000..f346ea7860105f9dfa1e8cc00b25bf2f55706ee4 --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/maddw.s @@ -0,0 +1,100 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vmaddwev.h.b $vr20, $vr27, $vr19 +# CHECK-INST: vmaddwev.h.b $vr20, $vr27, $vr19 +# CHECK-ENCODING: encoding: [0x74,0x4f,0xac,0x70] + +vmaddwev.w.h $vr6, $vr21, $vr19 +# CHECK-INST: vmaddwev.w.h $vr6, $vr21, $vr19 +# CHECK-ENCODING: encoding: [0xa6,0xce,0xac,0x70] + +vmaddwev.d.w $vr9, $vr20, $vr22 +# CHECK-INST: vmaddwev.d.w $vr9, $vr20, $vr22 +# CHECK-ENCODING: encoding: [0x89,0x5a,0xad,0x70] + +vmaddwev.q.d $vr11, $vr22, $vr5 +# CHECK-INST: vmaddwev.q.d $vr11, $vr22, $vr5 +# CHECK-ENCODING: encoding: [0xcb,0x96,0xad,0x70] + +vmaddwev.h.bu $vr7, $vr24, $vr12 +# CHECK-INST: vmaddwev.h.bu $vr7, $vr24, $vr12 +# CHECK-ENCODING: encoding: [0x07,0x33,0xb4,0x70] + +vmaddwev.w.hu $vr14, $vr10, $vr2 +# CHECK-INST: vmaddwev.w.hu $vr14, $vr10, $vr2 +# CHECK-ENCODING: encoding: [0x4e,0x89,0xb4,0x70] + +vmaddwev.d.wu $vr25, $vr22, $vr30 +# CHECK-INST: vmaddwev.d.wu $vr25, $vr22, $vr30 +# CHECK-ENCODING: encoding: [0xd9,0x7a,0xb5,0x70] + +vmaddwev.q.du $vr4, $vr5, $vr10 +# CHECK-INST: vmaddwev.q.du $vr4, $vr5, $vr10 +# CHECK-ENCODING: encoding: [0xa4,0xa8,0xb5,0x70] + +vmaddwev.h.bu.b $vr13, $vr17, $vr6 +# CHECK-INST: vmaddwev.h.bu.b $vr13, $vr17, $vr6 +# CHECK-ENCODING: encoding: [0x2d,0x1a,0xbc,0x70] + +vmaddwev.w.hu.h $vr1, $vr29, $vr13 +# CHECK-INST: vmaddwev.w.hu.h $vr1, $vr29, $vr13 +# CHECK-ENCODING: encoding: [0xa1,0xb7,0xbc,0x70] + +vmaddwev.d.wu.w $vr5, $vr13, $vr10 +# CHECK-INST: vmaddwev.d.wu.w $vr5, $vr13, $vr10 +# CHECK-ENCODING: encoding: [0xa5,0x29,0xbd,0x70] + +vmaddwev.q.du.d $vr16, $vr0, $vr26 +# CHECK-INST: vmaddwev.q.du.d $vr16, $vr0, $vr26 +# CHECK-ENCODING: encoding: [0x10,0xe8,0xbd,0x70] + +vmaddwod.h.b $vr29, $vr28, $vr11 +# CHECK-INST: vmaddwod.h.b $vr29, $vr28, $vr11 +# CHECK-ENCODING: encoding: [0x9d,0x2f,0xae,0x70] + +vmaddwod.w.h $vr10, $vr5, $vr29 +# CHECK-INST: vmaddwod.w.h $vr10, $vr5, $vr29 +# CHECK-ENCODING: encoding: [0xaa,0xf4,0xae,0x70] + +vmaddwod.d.w $vr16, $vr7, $vr26 +# CHECK-INST: vmaddwod.d.w $vr16, $vr7, $vr26 +# CHECK-ENCODING: encoding: [0xf0,0x68,0xaf,0x70] + +vmaddwod.q.d $vr1, $vr4, $vr7 +# CHECK-INST: vmaddwod.q.d $vr1, $vr4, $vr7 +# CHECK-ENCODING: encoding: [0x81,0x9c,0xaf,0x70] + +vmaddwod.h.bu $vr9, $vr28, $vr19 +# CHECK-INST: vmaddwod.h.bu $vr9, $vr28, $vr19 +# CHECK-ENCODING: encoding: [0x89,0x4f,0xb6,0x70] + +vmaddwod.w.hu $vr4, $vr6, $vr19 +# CHECK-INST: vmaddwod.w.hu $vr4, $vr6, $vr19 +# CHECK-ENCODING: encoding: [0xc4,0xcc,0xb6,0x70] + +vmaddwod.d.wu $vr2, $vr26, $vr26 +# CHECK-INST: vmaddwod.d.wu $vr2, $vr26, $vr26 +# CHECK-ENCODING: encoding: [0x42,0x6b,0xb7,0x70] + +vmaddwod.q.du $vr9, $vr18, $vr31 +# CHECK-INST: vmaddwod.q.du $vr9, $vr18, $vr31 +# CHECK-ENCODING: encoding: [0x49,0xfe,0xb7,0x70] + +vmaddwod.h.bu.b $vr22, $vr3, $vr25 +# CHECK-INST: vmaddwod.h.bu.b $vr22, $vr3, $vr25 +# CHECK-ENCODING: encoding: [0x76,0x64,0xbe,0x70] + +vmaddwod.w.hu.h $vr17, $vr20, $vr22 +# CHECK-INST: vmaddwod.w.hu.h $vr17, $vr20, $vr22 +# CHECK-ENCODING: encoding: [0x91,0xda,0xbe,0x70] + +vmaddwod.d.wu.w $vr21, $vr14, $vr6 +# CHECK-INST: vmaddwod.d.wu.w $vr21, $vr14, $vr6 +# CHECK-ENCODING: encoding: [0xd5,0x19,0xbf,0x70] + +vmaddwod.q.du.d $vr8, $vr15, $vr11 +# CHECK-INST: vmaddwod.q.du.d $vr8, $vr15, $vr11 +# CHECK-ENCODING: encoding: [0xe8,0xad,0xbf,0x70] diff --git a/llvm/test/MC/LoongArch/lsx/max.s b/llvm/test/MC/LoongArch/lsx/max.s new file mode 100644 index 0000000000000000000000000000000000000000..2761f6913a86bc9b4f9add6c2e973741222c2ae6 --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/max.s @@ -0,0 +1,68 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vmax.b $vr6, $vr21, $vr16 +# CHECK-INST: vmax.b $vr6, $vr21, $vr16 +# CHECK-ENCODING: encoding: [0xa6,0x42,0x70,0x70] + +vmax.h $vr9, $vr28, $vr16 +# CHECK-INST: vmax.h $vr9, $vr28, $vr16 +# CHECK-ENCODING: encoding: [0x89,0xc3,0x70,0x70] + +vmax.w $vr6, $vr0, $vr9 +# CHECK-INST: vmax.w $vr6, $vr0, $vr9 +# CHECK-ENCODING: encoding: [0x06,0x24,0x71,0x70] + +vmax.d $vr26, $vr3, $vr0 +# CHECK-INST: vmax.d $vr26, $vr3, $vr0 +# CHECK-ENCODING: encoding: [0x7a,0x80,0x71,0x70] + +vmaxi.b $vr2, $vr21, -8 +# CHECK-INST: vmaxi.b $vr2, $vr21, -8 +# CHECK-ENCODING: encoding: [0xa2,0x62,0x90,0x72] + +vmaxi.h $vr2, $vr21, -2 +# CHECK-INST: vmaxi.h $vr2, $vr21, -2 +# CHECK-ENCODING: encoding: [0xa2,0xfa,0x90,0x72] + +vmaxi.w $vr26, $vr21, -9 +# CHECK-INST: vmaxi.w $vr26, $vr21, -9 +# CHECK-ENCODING: encoding: [0xba,0x5e,0x91,0x72] + +vmaxi.d $vr30, $vr28, -2 +# CHECK-INST: vmaxi.d $vr30, $vr28, -2 +# CHECK-ENCODING: encoding: [0x9e,0xfb,0x91,0x72] + +vmax.bu $vr8, $vr7, $vr7 +# CHECK-INST: vmax.bu $vr8, $vr7, $vr7 +# CHECK-ENCODING: encoding: [0xe8,0x1c,0x74,0x70] + +vmax.hu $vr21, $vr10, $vr11 +# CHECK-INST: vmax.hu $vr21, $vr10, $vr11 +# CHECK-ENCODING: encoding: [0x55,0xad,0x74,0x70] + +vmax.wu $vr24, $vr13, $vr25 +# CHECK-INST: vmax.wu $vr24, $vr13, $vr25 +# CHECK-ENCODING: encoding: [0xb8,0x65,0x75,0x70] + +vmax.du $vr23, $vr11, $vr14 +# CHECK-INST: vmax.du $vr23, $vr11, $vr14 +# CHECK-ENCODING: encoding: [0x77,0xb9,0x75,0x70] + +vmaxi.bu $vr2, $vr9, 18 +# CHECK-INST: vmaxi.bu $vr2, $vr9, 18 +# CHECK-ENCODING: encoding: [0x22,0x49,0x94,0x72] + +vmaxi.hu $vr11, $vr23, 18 +# CHECK-INST: vmaxi.hu $vr11, $vr23, 18 +# CHECK-ENCODING: encoding: [0xeb,0xca,0x94,0x72] + +vmaxi.wu $vr15, $vr0, 29 +# CHECK-INST: vmaxi.wu $vr15, $vr0, 29 +# CHECK-ENCODING: encoding: [0x0f,0x74,0x95,0x72] + +vmaxi.du $vr20, $vr1, 14 +# CHECK-INST: vmaxi.du $vr20, $vr1, 14 +# CHECK-ENCODING: encoding: [0x34,0xb8,0x95,0x72] diff --git a/llvm/test/MC/LoongArch/lsx/min.s b/llvm/test/MC/LoongArch/lsx/min.s new file mode 100644 index 0000000000000000000000000000000000000000..7843f95ea80e98fb5d443f7e98836cefda598815 --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/min.s @@ -0,0 +1,68 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vmin.b $vr24, $vr31, $vr5 +# CHECK-INST: vmin.b $vr24, $vr31, $vr5 +# CHECK-ENCODING: encoding: [0xf8,0x17,0x72,0x70] + +vmin.h $vr8, $vr17, $vr29 +# CHECK-INST: vmin.h $vr8, $vr17, $vr29 +# CHECK-ENCODING: encoding: [0x28,0xf6,0x72,0x70] + +vmin.w $vr6, $vr31, $vr20 +# CHECK-INST: vmin.w $vr6, $vr31, $vr20 +# CHECK-ENCODING: encoding: [0xe6,0x53,0x73,0x70] + +vmin.d $vr5, $vr11, $vr14 +# CHECK-INST: vmin.d $vr5, $vr11, $vr14 +# CHECK-ENCODING: encoding: [0x65,0xb9,0x73,0x70] + +vmini.b $vr8, $vr28, 0 +# CHECK-INST: vmini.b $vr8, $vr28, 0 +# CHECK-ENCODING: encoding: [0x88,0x03,0x92,0x72] + +vmini.h $vr12, $vr12, 0 +# CHECK-INST: vmini.h $vr12, $vr12, 0 +# CHECK-ENCODING: encoding: [0x8c,0x81,0x92,0x72] + +vmini.w $vr17, $vr1, 4 +# CHECK-INST: vmini.w $vr17, $vr1, 4 +# CHECK-ENCODING: encoding: [0x31,0x10,0x93,0x72] + +vmini.d $vr13, $vr2, -14 +# CHECK-INST: vmini.d $vr13, $vr2, -14 +# CHECK-ENCODING: encoding: [0x4d,0xc8,0x93,0x72] + +vmin.bu $vr30, $vr13, $vr11 +# CHECK-INST: vmin.bu $vr30, $vr13, $vr11 +# CHECK-ENCODING: encoding: [0xbe,0x2d,0x76,0x70] + +vmin.hu $vr13, $vr10, $vr17 +# CHECK-INST: vmin.hu $vr13, $vr10, $vr17 +# CHECK-ENCODING: encoding: [0x4d,0xc5,0x76,0x70] + +vmin.wu $vr29, $vr10, $vr27 +# CHECK-INST: vmin.wu $vr29, $vr10, $vr27 +# CHECK-ENCODING: encoding: [0x5d,0x6d,0x77,0x70] + +vmin.du $vr8, $vr1, $vr16 +# CHECK-INST: vmin.du $vr8, $vr1, $vr16 +# CHECK-ENCODING: encoding: [0x28,0xc0,0x77,0x70] + +vmini.bu $vr16, $vr22, 4 +# CHECK-INST: vmini.bu $vr16, $vr22, 4 +# CHECK-ENCODING: encoding: [0xd0,0x12,0x96,0x72] + +vmini.hu $vr1, $vr24, 20 +# CHECK-INST: vmini.hu $vr1, $vr24, 20 +# CHECK-ENCODING: encoding: [0x01,0xd3,0x96,0x72] + +vmini.wu $vr15, $vr5, 9 +# CHECK-INST: vmini.wu $vr15, $vr5, 9 +# CHECK-ENCODING: encoding: [0xaf,0x24,0x97,0x72] + +vmini.du $vr31, $vr8, 25 +# CHECK-INST: vmini.du $vr31, $vr8, 25 +# CHECK-ENCODING: encoding: [0x1f,0xe5,0x97,0x72] diff --git a/llvm/test/MC/LoongArch/lsx/mod.s b/llvm/test/MC/LoongArch/lsx/mod.s new file mode 100644 index 0000000000000000000000000000000000000000..1033e8056d295dfeacec24c8d10518e48fbfffeb --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/mod.s @@ -0,0 +1,36 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vmod.b $vr28, $vr30, $vr25 +# CHECK-INST: vmod.b $vr28, $vr30, $vr25 +# CHECK-ENCODING: encoding: [0xdc,0x67,0xe2,0x70] + +vmod.h $vr18, $vr31, $vr26 +# CHECK-INST: vmod.h $vr18, $vr31, $vr26 +# CHECK-ENCODING: encoding: [0xf2,0xeb,0xe2,0x70] + +vmod.w $vr16, $vr20, $vr1 +# CHECK-INST: vmod.w $vr16, $vr20, $vr1 +# CHECK-ENCODING: encoding: [0x90,0x06,0xe3,0x70] + +vmod.d $vr26, $vr27, $vr13 +# CHECK-INST: vmod.d $vr26, $vr27, $vr13 +# CHECK-ENCODING: encoding: [0x7a,0xb7,0xe3,0x70] + +vmod.bu $vr19, $vr8, $vr11 +# CHECK-INST: vmod.bu $vr19, $vr8, $vr11 +# CHECK-ENCODING: encoding: [0x13,0x2d,0xe6,0x70] + +vmod.hu $vr14, $vr21, $vr9 +# CHECK-INST: vmod.hu $vr14, $vr21, $vr9 +# CHECK-ENCODING: encoding: [0xae,0xa6,0xe6,0x70] + +vmod.wu $vr19, $vr0, $vr5 +# CHECK-INST: vmod.wu $vr19, $vr0, $vr5 +# CHECK-ENCODING: encoding: [0x13,0x14,0xe7,0x70] + +vmod.du $vr12, $vr18, $vr31 +# CHECK-INST: vmod.du $vr12, $vr18, $vr31 +# CHECK-ENCODING: encoding: [0x4c,0xfe,0xe7,0x70] diff --git a/llvm/test/MC/LoongArch/lsx/mskgez.s b/llvm/test/MC/LoongArch/lsx/mskgez.s new file mode 100644 index 0000000000000000000000000000000000000000..0112d06e8e258c71358a310dc0db6d24b2afc2ba --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/mskgez.s @@ -0,0 +1,8 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vmskgez.b $vr13, $vr0 +# CHECK-INST: vmskgez.b $vr13, $vr0 +# CHECK-ENCODING: encoding: [0x0d,0x50,0x9c,0x72] diff --git a/llvm/test/MC/LoongArch/lsx/mskltz.s b/llvm/test/MC/LoongArch/lsx/mskltz.s new file mode 100644 index 0000000000000000000000000000000000000000..8f68faad1bf5f979aa5a7dacbc0d987328c6f62c --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/mskltz.s @@ -0,0 +1,20 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vmskltz.b $vr17, $vr20 +# CHECK-INST: vmskltz.b $vr17, $vr20 +# CHECK-ENCODING: encoding: [0x91,0x42,0x9c,0x72] + +vmskltz.h $vr23, $vr1 +# CHECK-INST: vmskltz.h $vr23, $vr1 +# CHECK-ENCODING: encoding: [0x37,0x44,0x9c,0x72] + +vmskltz.w $vr3, $vr16 +# CHECK-INST: vmskltz.w $vr3, $vr16 +# CHECK-ENCODING: encoding: [0x03,0x4a,0x9c,0x72] + +vmskltz.d $vr1, $vr26 +# CHECK-INST: vmskltz.d $vr1, $vr26 +# CHECK-ENCODING: encoding: [0x41,0x4f,0x9c,0x72] diff --git a/llvm/test/MC/LoongArch/lsx/msknz.s b/llvm/test/MC/LoongArch/lsx/msknz.s new file mode 100644 index 0000000000000000000000000000000000000000..3805e8830d3984aa1fa0e54fb6a402b91492e478 --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/msknz.s @@ -0,0 +1,8 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vmsknz.b $vr20, $vr21 +# CHECK-INST: vmsknz.b $vr20, $vr21 +# CHECK-ENCODING: encoding: [0xb4,0x62,0x9c,0x72] diff --git a/llvm/test/MC/LoongArch/lsx/msub.s b/llvm/test/MC/LoongArch/lsx/msub.s new file mode 100644 index 0000000000000000000000000000000000000000..f3e7e715334559c9680bc83608f3018e3baf4a3c --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/msub.s @@ -0,0 +1,20 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vmsub.b $vr19, $vr20, $vr12 +# CHECK-INST: vmsub.b $vr19, $vr20, $vr12 +# CHECK-ENCODING: encoding: [0x93,0x32,0xaa,0x70] + +vmsub.h $vr1, $vr9, $vr22 +# CHECK-INST: vmsub.h $vr1, $vr9, $vr22 +# CHECK-ENCODING: encoding: [0x21,0xd9,0xaa,0x70] + +vmsub.w $vr10, $vr2, $vr13 +# CHECK-INST: vmsub.w $vr10, $vr2, $vr13 +# CHECK-ENCODING: encoding: [0x4a,0x34,0xab,0x70] + +vmsub.d $vr28, $vr31, $vr6 +# CHECK-INST: vmsub.d $vr28, $vr31, $vr6 +# CHECK-ENCODING: encoding: [0xfc,0x9b,0xab,0x70] diff --git a/llvm/test/MC/LoongArch/lsx/muh.s b/llvm/test/MC/LoongArch/lsx/muh.s new file mode 100644 index 0000000000000000000000000000000000000000..b206480408bbe883ae871c3568d2d2f7d0f0b56f --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/muh.s @@ -0,0 +1,36 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vmuh.b $vr23, $vr18, $vr21 +# CHECK-INST: vmuh.b $vr23, $vr18, $vr21 +# CHECK-ENCODING: encoding: [0x57,0x56,0x86,0x70] + +vmuh.h $vr25, $vr18, $vr5 +# CHECK-INST: vmuh.h $vr25, $vr18, $vr5 +# CHECK-ENCODING: encoding: [0x59,0x96,0x86,0x70] + +vmuh.w $vr6, $vr9, $vr14 +# CHECK-INST: vmuh.w $vr6, $vr9, $vr14 +# CHECK-ENCODING: encoding: [0x26,0x39,0x87,0x70] + +vmuh.d $vr31, $vr21, $vr8 +# CHECK-INST: vmuh.d $vr31, $vr21, $vr8 +# CHECK-ENCODING: encoding: [0xbf,0xa2,0x87,0x70] + +vmuh.bu $vr11, $vr26, $vr7 +# CHECK-INST: vmuh.bu $vr11, $vr26, $vr7 +# CHECK-ENCODING: encoding: [0x4b,0x1f,0x88,0x70] + +vmuh.hu $vr27, $vr4, $vr28 +# CHECK-INST: vmuh.hu $vr27, $vr4, $vr28 +# CHECK-ENCODING: encoding: [0x9b,0xf0,0x88,0x70] + +vmuh.wu $vr28, $vr21, $vr28 +# CHECK-INST: vmuh.wu $vr28, $vr21, $vr28 +# CHECK-ENCODING: encoding: [0xbc,0x72,0x89,0x70] + +vmuh.du $vr25, $vr3, $vr4 +# CHECK-INST: vmuh.du $vr25, $vr3, $vr4 +# CHECK-ENCODING: encoding: [0x79,0x90,0x89,0x70] diff --git a/llvm/test/MC/LoongArch/lsx/mul.s b/llvm/test/MC/LoongArch/lsx/mul.s new file mode 100644 index 0000000000000000000000000000000000000000..7ddfc64cc7ea28fb6c3b5acc81b9e1c6d60567a0 --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/mul.s @@ -0,0 +1,20 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vmul.b $vr25, $vr30, $vr7 +# CHECK-INST: vmul.b $vr25, $vr30, $vr7 +# CHECK-ENCODING: encoding: [0xd9,0x1f,0x84,0x70] + +vmul.h $vr16, $vr1, $vr26 +# CHECK-INST: vmul.h $vr16, $vr1, $vr26 +# CHECK-ENCODING: encoding: [0x30,0xe8,0x84,0x70] + +vmul.w $vr24, $vr22, $vr29 +# CHECK-INST: vmul.w $vr24, $vr22, $vr29 +# CHECK-ENCODING: encoding: [0xd8,0x76,0x85,0x70] + +vmul.d $vr27, $vr16, $vr25 +# CHECK-INST: vmul.d $vr27, $vr16, $vr25 +# CHECK-ENCODING: encoding: [0x1b,0xe6,0x85,0x70] diff --git a/llvm/test/MC/LoongArch/lsx/mulw.s b/llvm/test/MC/LoongArch/lsx/mulw.s new file mode 100644 index 0000000000000000000000000000000000000000..9228e2e6f3339b2913072f29d7de33de35cfdf40 --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/mulw.s @@ -0,0 +1,100 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vmulwev.h.b $vr5, $vr6, $vr0 +# CHECK-INST: vmulwev.h.b $vr5, $vr6, $vr0 +# CHECK-ENCODING: encoding: [0xc5,0x00,0x90,0x70] + +vmulwev.w.h $vr4, $vr25, $vr2 +# CHECK-INST: vmulwev.w.h $vr4, $vr25, $vr2 +# CHECK-ENCODING: encoding: [0x24,0x8b,0x90,0x70] + +vmulwev.d.w $vr30, $vr28, $vr27 +# CHECK-INST: vmulwev.d.w $vr30, $vr28, $vr27 +# CHECK-ENCODING: encoding: [0x9e,0x6f,0x91,0x70] + +vmulwev.q.d $vr2, $vr1, $vr27 +# CHECK-INST: vmulwev.q.d $vr2, $vr1, $vr27 +# CHECK-ENCODING: encoding: [0x22,0xec,0x91,0x70] + +vmulwev.h.bu $vr10, $vr9, $vr4 +# CHECK-INST: vmulwev.h.bu $vr10, $vr9, $vr4 +# CHECK-ENCODING: encoding: [0x2a,0x11,0x98,0x70] + +vmulwev.w.hu $vr20, $vr31, $vr28 +# CHECK-INST: vmulwev.w.hu $vr20, $vr31, $vr28 +# CHECK-ENCODING: encoding: [0xf4,0xf3,0x98,0x70] + +vmulwev.d.wu $vr4, $vr6, $vr21 +# CHECK-INST: vmulwev.d.wu $vr4, $vr6, $vr21 +# CHECK-ENCODING: encoding: [0xc4,0x54,0x99,0x70] + +vmulwev.q.du $vr15, $vr21, $vr30 +# CHECK-INST: vmulwev.q.du $vr15, $vr21, $vr30 +# CHECK-ENCODING: encoding: [0xaf,0xfa,0x99,0x70] + +vmulwev.h.bu.b $vr29, $vr24, $vr15 +# CHECK-INST: vmulwev.h.bu.b $vr29, $vr24, $vr15 +# CHECK-ENCODING: encoding: [0x1d,0x3f,0xa0,0x70] + +vmulwev.w.hu.h $vr2, $vr28, $vr31 +# CHECK-INST: vmulwev.w.hu.h $vr2, $vr28, $vr31 +# CHECK-ENCODING: encoding: [0x82,0xff,0xa0,0x70] + +vmulwev.d.wu.w $vr12, $vr23, $vr6 +# CHECK-INST: vmulwev.d.wu.w $vr12, $vr23, $vr6 +# CHECK-ENCODING: encoding: [0xec,0x1a,0xa1,0x70] + +vmulwev.q.du.d $vr17, $vr9, $vr13 +# CHECK-INST: vmulwev.q.du.d $vr17, $vr9, $vr13 +# CHECK-ENCODING: encoding: [0x31,0xb5,0xa1,0x70] + +vmulwod.h.b $vr17, $vr0, $vr16 +# CHECK-INST: vmulwod.h.b $vr17, $vr0, $vr16 +# CHECK-ENCODING: encoding: [0x11,0x40,0x92,0x70] + +vmulwod.w.h $vr29, $vr5, $vr20 +# CHECK-INST: vmulwod.w.h $vr29, $vr5, $vr20 +# CHECK-ENCODING: encoding: [0xbd,0xd0,0x92,0x70] + +vmulwod.d.w $vr7, $vr26, $vr6 +# CHECK-INST: vmulwod.d.w $vr7, $vr26, $vr6 +# CHECK-ENCODING: encoding: [0x47,0x1b,0x93,0x70] + +vmulwod.q.d $vr13, $vr25, $vr30 +# CHECK-INST: vmulwod.q.d $vr13, $vr25, $vr30 +# CHECK-ENCODING: encoding: [0x2d,0xfb,0x93,0x70] + +vmulwod.h.bu $vr29, $vr20, $vr10 +# CHECK-INST: vmulwod.h.bu $vr29, $vr20, $vr10 +# CHECK-ENCODING: encoding: [0x9d,0x2a,0x9a,0x70] + +vmulwod.w.hu $vr31, $vr4, $vr25 +# CHECK-INST: vmulwod.w.hu $vr31, $vr4, $vr25 +# CHECK-ENCODING: encoding: [0x9f,0xe4,0x9a,0x70] + +vmulwod.d.wu $vr7, $vr26, $vr16 +# CHECK-INST: vmulwod.d.wu $vr7, $vr26, $vr16 +# CHECK-ENCODING: encoding: [0x47,0x43,0x9b,0x70] + +vmulwod.q.du $vr25, $vr10, $vr4 +# CHECK-INST: vmulwod.q.du $vr25, $vr10, $vr4 +# CHECK-ENCODING: encoding: [0x59,0x91,0x9b,0x70] + +vmulwod.h.bu.b $vr6, $vr25, $vr11 +# CHECK-INST: vmulwod.h.bu.b $vr6, $vr25, $vr11 +# CHECK-ENCODING: encoding: [0x26,0x2f,0xa2,0x70] + +vmulwod.w.hu.h $vr18, $vr25, $vr31 +# CHECK-INST: vmulwod.w.hu.h $vr18, $vr25, $vr31 +# CHECK-ENCODING: encoding: [0x32,0xff,0xa2,0x70] + +vmulwod.d.wu.w $vr10, $vr28, $vr26 +# CHECK-INST: vmulwod.d.wu.w $vr10, $vr28, $vr26 +# CHECK-ENCODING: encoding: [0x8a,0x6b,0xa3,0x70] + +vmulwod.q.du.d $vr30, $vr23, $vr17 +# CHECK-INST: vmulwod.q.du.d $vr30, $vr23, $vr17 +# CHECK-ENCODING: encoding: [0xfe,0xc6,0xa3,0x70] diff --git a/llvm/test/MC/LoongArch/lsx/neg.s b/llvm/test/MC/LoongArch/lsx/neg.s new file mode 100644 index 0000000000000000000000000000000000000000..34a6d975126318160a517dfa9a18f71d625cc972 --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/neg.s @@ -0,0 +1,20 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vneg.b $vr11, $vr29 +# CHECK-INST: vneg.b $vr11, $vr29 +# CHECK-ENCODING: encoding: [0xab,0x33,0x9c,0x72] + +vneg.h $vr14, $vr4 +# CHECK-INST: vneg.h $vr14, $vr4 +# CHECK-ENCODING: encoding: [0x8e,0x34,0x9c,0x72] + +vneg.w $vr4, $vr0 +# CHECK-INST: vneg.w $vr4, $vr0 +# CHECK-ENCODING: encoding: [0x04,0x38,0x9c,0x72] + +vneg.d $vr0, $vr5 +# CHECK-INST: vneg.d $vr0, $vr5 +# CHECK-ENCODING: encoding: [0xa0,0x3c,0x9c,0x72] diff --git a/llvm/test/MC/LoongArch/lsx/nor.s b/llvm/test/MC/LoongArch/lsx/nor.s new file mode 100644 index 0000000000000000000000000000000000000000..a74074f09f6c987cf7dc5b785b5570052e700984 --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/nor.s @@ -0,0 +1,8 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vnor.v $vr18, $vr5, $vr29 +# CHECK-INST: vnor.v $vr18, $vr5, $vr29 +# CHECK-ENCODING: encoding: [0xb2,0xf4,0x27,0x71] diff --git a/llvm/test/MC/LoongArch/lsx/nori.s b/llvm/test/MC/LoongArch/lsx/nori.s new file mode 100644 index 0000000000000000000000000000000000000000..7693568df0e87210c001457414b16fbdd867f35d --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/nori.s @@ -0,0 +1,8 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vnori.b $vr8, $vr16, 186 +# CHECK-INST: vnori.b $vr8, $vr16, 186 +# CHECK-ENCODING: encoding: [0x08,0xea,0xde,0x73] diff --git a/llvm/test/MC/LoongArch/lsx/or.s b/llvm/test/MC/LoongArch/lsx/or.s new file mode 100644 index 0000000000000000000000000000000000000000..c349eac2a9ce61cabd69e9fb38903f543c18b9aa --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/or.s @@ -0,0 +1,8 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vor.v $vr9, $vr18, $vr24 +# CHECK-INST: vor.v $vr9, $vr18, $vr24 +# CHECK-ENCODING: encoding: [0x49,0xe2,0x26,0x71] diff --git a/llvm/test/MC/LoongArch/lsx/ori.s b/llvm/test/MC/LoongArch/lsx/ori.s new file mode 100644 index 0000000000000000000000000000000000000000..def8fbb9c91ed1dc80e41cb237a4f1459cb85a4c --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/ori.s @@ -0,0 +1,8 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vori.b $vr23, $vr3, 252 +# CHECK-INST: vori.b $vr23, $vr3, 252 +# CHECK-ENCODING: encoding: [0x77,0xf0,0xd7,0x73] diff --git a/llvm/test/MC/LoongArch/lsx/orn.s b/llvm/test/MC/LoongArch/lsx/orn.s new file mode 100644 index 0000000000000000000000000000000000000000..60864e9a30bee534e48f94ebdd344dcf65dc82a3 --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/orn.s @@ -0,0 +1,8 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vorn.v $vr11, $vr20, $vr17 +# CHECK-INST: vorn.v $vr11, $vr20, $vr17 +# CHECK-ENCODING: encoding: [0x8b,0xc6,0x28,0x71] diff --git a/llvm/test/MC/LoongArch/lsx/pack.s b/llvm/test/MC/LoongArch/lsx/pack.s new file mode 100644 index 0000000000000000000000000000000000000000..4d9b8b3f2a0d2eddd4d99692eb2d5adc17cf3024 --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/pack.s @@ -0,0 +1,36 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vpackev.b $vr1, $vr27, $vr16 +# CHECK-INST: vpackev.b $vr1, $vr27, $vr16 +# CHECK-ENCODING: encoding: [0x61,0x43,0x16,0x71] + +vpackev.h $vr0, $vr3, $vr25 +# CHECK-INST: vpackev.h $vr0, $vr3, $vr25 +# CHECK-ENCODING: encoding: [0x60,0xe4,0x16,0x71] + +vpackev.w $vr10, $vr4, $vr29 +# CHECK-INST: vpackev.w $vr10, $vr4, $vr29 +# CHECK-ENCODING: encoding: [0x8a,0x74,0x17,0x71] + +vpackev.d $vr28, $vr6, $vr7 +# CHECK-INST: vpackev.d $vr28, $vr6, $vr7 +# CHECK-ENCODING: encoding: [0xdc,0x9c,0x17,0x71] + +vpackod.b $vr14, $vr13, $vr7 +# CHECK-INST: vpackod.b $vr14, $vr13, $vr7 +# CHECK-ENCODING: encoding: [0xae,0x1d,0x18,0x71] + +vpackod.h $vr28, $vr5, $vr7 +# CHECK-INST: vpackod.h $vr28, $vr5, $vr7 +# CHECK-ENCODING: encoding: [0xbc,0x9c,0x18,0x71] + +vpackod.w $vr15, $vr11, $vr17 +# CHECK-INST: vpackod.w $vr15, $vr11, $vr17 +# CHECK-ENCODING: encoding: [0x6f,0x45,0x19,0x71] + +vpackod.d $vr12, $vr15, $vr0 +# CHECK-INST: vpackod.d $vr12, $vr15, $vr0 +# CHECK-ENCODING: encoding: [0xec,0x81,0x19,0x71] diff --git a/llvm/test/MC/LoongArch/lsx/pcnt.s b/llvm/test/MC/LoongArch/lsx/pcnt.s new file mode 100644 index 0000000000000000000000000000000000000000..ee896aee8067b91a659e3f58286d5c02ba4656bc --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/pcnt.s @@ -0,0 +1,20 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vpcnt.b $vr2, $vr7 +# CHECK-INST: vpcnt.b $vr2, $vr7 +# CHECK-ENCODING: encoding: [0xe2,0x20,0x9c,0x72] + +vpcnt.h $vr23, $vr25 +# CHECK-INST: vpcnt.h $vr23, $vr25 +# CHECK-ENCODING: encoding: [0x37,0x27,0x9c,0x72] + +vpcnt.w $vr17, $vr24 +# CHECK-INST: vpcnt.w $vr17, $vr24 +# CHECK-ENCODING: encoding: [0x11,0x2b,0x9c,0x72] + +vpcnt.d $vr4, $vr13 +# CHECK-INST: vpcnt.d $vr4, $vr13 +# CHECK-ENCODING: encoding: [0xa4,0x2d,0x9c,0x72] diff --git a/llvm/test/MC/LoongArch/lsx/permi.s b/llvm/test/MC/LoongArch/lsx/permi.s new file mode 100644 index 0000000000000000000000000000000000000000..3b4e1a55449b8a680a4cfde9f508f89d2a19c5d0 --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/permi.s @@ -0,0 +1,8 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vpermi.w $vr2, $vr22, 219 +# CHECK-INST: vpermi.w $vr2, $vr22, 219 +# CHECK-ENCODING: encoding: [0xc2,0x6e,0xe7,0x73] diff --git a/llvm/test/MC/LoongArch/lsx/pick.s b/llvm/test/MC/LoongArch/lsx/pick.s new file mode 100644 index 0000000000000000000000000000000000000000..f54c6226f5a082c23f0993ca9b09a61d756e038c --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/pick.s @@ -0,0 +1,36 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vpickev.b $vr8, $vr13, $vr23 +# CHECK-INST: vpickev.b $vr8, $vr13, $vr23 +# CHECK-ENCODING: encoding: [0xa8,0x5d,0x1e,0x71] + +vpickev.h $vr11, $vr18, $vr19 +# CHECK-INST: vpickev.h $vr11, $vr18, $vr19 +# CHECK-ENCODING: encoding: [0x4b,0xce,0x1e,0x71] + +vpickev.w $vr16, $vr31, $vr30 +# CHECK-INST: vpickev.w $vr16, $vr31, $vr30 +# CHECK-ENCODING: encoding: [0xf0,0x7b,0x1f,0x71] + +vpickev.d $vr1, $vr28, $vr8 +# CHECK-INST: vpickev.d $vr1, $vr28, $vr8 +# CHECK-ENCODING: encoding: [0x81,0xa3,0x1f,0x71] + +vpickod.b $vr29, $vr28, $vr28 +# CHECK-INST: vpickod.b $vr29, $vr28, $vr28 +# CHECK-ENCODING: encoding: [0x9d,0x73,0x20,0x71] + +vpickod.h $vr5, $vr5, $vr1 +# CHECK-INST: vpickod.h $vr5, $vr5, $vr1 +# CHECK-ENCODING: encoding: [0xa5,0x84,0x20,0x71] + +vpickod.w $vr18, $vr8, $vr22 +# CHECK-INST: vpickod.w $vr18, $vr8, $vr22 +# CHECK-ENCODING: encoding: [0x12,0x59,0x21,0x71] + +vpickod.d $vr5, $vr5, $vr22 +# CHECK-INST: vpickod.d $vr5, $vr5, $vr22 +# CHECK-ENCODING: encoding: [0xa5,0xd8,0x21,0x71] diff --git a/llvm/test/MC/LoongArch/lsx/pickve2gr.s b/llvm/test/MC/LoongArch/lsx/pickve2gr.s new file mode 100644 index 0000000000000000000000000000000000000000..7a28e8104f028cb7afa078ee9de2fe6bfdbfe521 --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/pickve2gr.s @@ -0,0 +1,36 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vpickve2gr.b $r18, $vr1, 1 +# CHECK-INST: vpickve2gr.b $t6, $vr1, 1 +# CHECK-ENCODING: encoding: [0x32,0x84,0xef,0x72] + +vpickve2gr.h $r2, $vr5, 3 +# CHECK-INST: vpickve2gr.h $tp, $vr5, 3 +# CHECK-ENCODING: encoding: [0xa2,0xcc,0xef,0x72] + +vpickve2gr.w $r3, $vr11, 2 +# CHECK-INST: vpickve2gr.w $sp, $vr11, 2 +# CHECK-ENCODING: encoding: [0x63,0xe9,0xef,0x72] + +vpickve2gr.d $r26, $vr1, 1 +# CHECK-INST: vpickve2gr.d $s3, $vr1, 1 +# CHECK-ENCODING: encoding: [0x3a,0xf4,0xef,0x72] + +vpickve2gr.bu $r28, $vr14, 6 +# CHECK-INST: vpickve2gr.bu $s5, $vr14, 6 +# CHECK-ENCODING: encoding: [0xdc,0x99,0xf3,0x72] + +vpickve2gr.hu $r7, $vr6, 7 +# CHECK-INST: vpickve2gr.hu $a3, $vr6, 7 +# CHECK-ENCODING: encoding: [0xc7,0xdc,0xf3,0x72] + +vpickve2gr.wu $r11, $vr30, 1 +# CHECK-INST: vpickve2gr.wu $a7, $vr30, 1 +# CHECK-ENCODING: encoding: [0xcb,0xe7,0xf3,0x72] + +vpickve2gr.du $r13, $vr5, 0 +# CHECK-INST: vpickve2gr.du $t1, $vr5, 0 +# CHECK-ENCODING: encoding: [0xad,0xf0,0xf3,0x72] diff --git a/llvm/test/MC/LoongArch/lsx/replgr2vr.s b/llvm/test/MC/LoongArch/lsx/replgr2vr.s new file mode 100644 index 0000000000000000000000000000000000000000..5e5c048862fe34a7ab1ef93295f54890eb59dd61 --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/replgr2vr.s @@ -0,0 +1,20 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vreplgr2vr.b $vr30, $r27 +# CHECK-INST: vreplgr2vr.b $vr30, $s4 +# CHECK-ENCODING: encoding: [0x7e,0x03,0x9f,0x72] + +vreplgr2vr.h $vr6, $r1 +# CHECK-INST: vreplgr2vr.h $vr6, $ra +# CHECK-ENCODING: encoding: [0x26,0x04,0x9f,0x72] + +vreplgr2vr.w $vr23, $r9 +# CHECK-INST: vreplgr2vr.w $vr23, $a5 +# CHECK-ENCODING: encoding: [0x37,0x09,0x9f,0x72] + +vreplgr2vr.d $vr17, $r14 +# CHECK-INST: vreplgr2vr.d $vr17, $t2 +# CHECK-ENCODING: encoding: [0xd1,0x0d,0x9f,0x72] diff --git a/llvm/test/MC/LoongArch/lsx/replve.s b/llvm/test/MC/LoongArch/lsx/replve.s new file mode 100644 index 0000000000000000000000000000000000000000..b9943acb361ae8177ce4b9208a11f84dad0f0b23 --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/replve.s @@ -0,0 +1,20 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vreplve.b $vr10, $vr31, $r20 +# CHECK-INST: vreplve.b $vr10, $vr31, $t8 +# CHECK-ENCODING: encoding: [0xea,0x53,0x22,0x71] + +vreplve.h $vr8, $vr3, $r30 +# CHECK-INST: vreplve.h $vr8, $vr3, $s7 +# CHECK-ENCODING: encoding: [0x68,0xf8,0x22,0x71] + +vreplve.w $vr5, $vr1, $r20 +# CHECK-INST: vreplve.w $vr5, $vr1, $t8 +# CHECK-ENCODING: encoding: [0x25,0x50,0x23,0x71] + +vreplve.d $vr11, $vr15, $r30 +# CHECK-INST: vreplve.d $vr11, $vr15, $s7 +# CHECK-ENCODING: encoding: [0xeb,0xf9,0x23,0x71] diff --git a/llvm/test/MC/LoongArch/lsx/replvei.s b/llvm/test/MC/LoongArch/lsx/replvei.s new file mode 100644 index 0000000000000000000000000000000000000000..dd1ce0f96aeb1c4de4a90bc0af04452cb069eff7 --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/replvei.s @@ -0,0 +1,20 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vreplvei.b $vr23, $vr3, 3 +# CHECK-INST: vreplvei.b $vr23, $vr3, 3 +# CHECK-ENCODING: encoding: [0x77,0x8c,0xf7,0x72] + +vreplvei.h $vr27, $vr16, 0 +# CHECK-INST: vreplvei.h $vr27, $vr16, 0 +# CHECK-ENCODING: encoding: [0x1b,0xc2,0xf7,0x72] + +vreplvei.w $vr18, $vr23, 3 +# CHECK-INST: vreplvei.w $vr18, $vr23, 3 +# CHECK-ENCODING: encoding: [0xf2,0xee,0xf7,0x72] + +vreplvei.d $vr15, $vr12, 1 +# CHECK-INST: vreplvei.d $vr15, $vr12, 1 +# CHECK-ENCODING: encoding: [0x8f,0xf5,0xf7,0x72] diff --git a/llvm/test/MC/LoongArch/lsx/rotr.s b/llvm/test/MC/LoongArch/lsx/rotr.s new file mode 100644 index 0000000000000000000000000000000000000000..101405e5b6b86d9c829b36591dd112d1b1db4ef2 --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/rotr.s @@ -0,0 +1,36 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vrotr.b $vr15, $vr25, $vr30 +# CHECK-INST: vrotr.b $vr15, $vr25, $vr30 +# CHECK-ENCODING: encoding: [0x2f,0x7b,0xee,0x70] + +vrotr.h $vr5, $vr23, $vr14 +# CHECK-INST: vrotr.h $vr5, $vr23, $vr14 +# CHECK-ENCODING: encoding: [0xe5,0xba,0xee,0x70] + +vrotr.w $vr27, $vr0, $vr7 +# CHECK-INST: vrotr.w $vr27, $vr0, $vr7 +# CHECK-ENCODING: encoding: [0x1b,0x1c,0xef,0x70] + +vrotr.d $vr2, $vr3, $vr21 +# CHECK-INST: vrotr.d $vr2, $vr3, $vr21 +# CHECK-ENCODING: encoding: [0x62,0xd4,0xef,0x70] + +vrotri.b $vr17, $vr22, 5 +# CHECK-INST: vrotri.b $vr17, $vr22, 5 +# CHECK-ENCODING: encoding: [0xd1,0x36,0xa0,0x72] + +vrotri.h $vr27, $vr20, 10 +# CHECK-INST: vrotri.h $vr27, $vr20, 10 +# CHECK-ENCODING: encoding: [0x9b,0x6a,0xa0,0x72] + +vrotri.w $vr21, $vr24, 14 +# CHECK-INST: vrotri.w $vr21, $vr24, 14 +# CHECK-ENCODING: encoding: [0x15,0xbb,0xa0,0x72] + +vrotri.d $vr25, $vr23, 14 +# CHECK-INST: vrotri.d $vr25, $vr23, 14 +# CHECK-ENCODING: encoding: [0xf9,0x3a,0xa1,0x72] diff --git a/llvm/test/MC/LoongArch/lsx/sadd.s b/llvm/test/MC/LoongArch/lsx/sadd.s new file mode 100644 index 0000000000000000000000000000000000000000..9709a8a8e75f2cbbcd27111c598d580f762704e3 --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/sadd.s @@ -0,0 +1,36 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vsadd.b $vr29, $vr30, $vr11 +# CHECK-INST: vsadd.b $vr29, $vr30, $vr11 +# CHECK-ENCODING: encoding: [0xdd,0x2f,0x46,0x70] + +vsadd.h $vr1, $vr2, $vr29 +# CHECK-INST: vsadd.h $vr1, $vr2, $vr29 +# CHECK-ENCODING: encoding: [0x41,0xf4,0x46,0x70] + +vsadd.w $vr19, $vr28, $vr28 +# CHECK-INST: vsadd.w $vr19, $vr28, $vr28 +# CHECK-ENCODING: encoding: [0x93,0x73,0x47,0x70] + +vsadd.d $vr19, $vr30, $vr20 +# CHECK-INST: vsadd.d $vr19, $vr30, $vr20 +# CHECK-ENCODING: encoding: [0xd3,0xd3,0x47,0x70] + +vsadd.bu $vr22, $vr22, $vr16 +# CHECK-INST: vsadd.bu $vr22, $vr22, $vr16 +# CHECK-ENCODING: encoding: [0xd6,0x42,0x4a,0x70] + +vsadd.hu $vr0, $vr16, $vr8 +# CHECK-INST: vsadd.hu $vr0, $vr16, $vr8 +# CHECK-ENCODING: encoding: [0x00,0xa2,0x4a,0x70] + +vsadd.wu $vr9, $vr23, $vr24 +# CHECK-INST: vsadd.wu $vr9, $vr23, $vr24 +# CHECK-ENCODING: encoding: [0xe9,0x62,0x4b,0x70] + +vsadd.du $vr28, $vr11, $vr30 +# CHECK-INST: vsadd.du $vr28, $vr11, $vr30 +# CHECK-ENCODING: encoding: [0x7c,0xf9,0x4b,0x70] diff --git a/llvm/test/MC/LoongArch/lsx/sat.s b/llvm/test/MC/LoongArch/lsx/sat.s new file mode 100644 index 0000000000000000000000000000000000000000..677e7fc5ab53474ecc80752d7c46006954f35b9b --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/sat.s @@ -0,0 +1,36 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vsat.b $vr29, $vr0, 1 +# CHECK-INST: vsat.b $vr29, $vr0, 1 +# CHECK-ENCODING: encoding: [0x1d,0x24,0x24,0x73] + +vsat.h $vr4, $vr13, 13 +# CHECK-INST: vsat.h $vr4, $vr13, 13 +# CHECK-ENCODING: encoding: [0xa4,0x75,0x24,0x73] + +vsat.w $vr6, $vr29, 19 +# CHECK-INST: vsat.w $vr6, $vr29, 19 +# CHECK-ENCODING: encoding: [0xa6,0xcf,0x24,0x73] + +vsat.d $vr22, $vr6, 54 +# CHECK-INST: vsat.d $vr22, $vr6, 54 +# CHECK-ENCODING: encoding: [0xd6,0xd8,0x25,0x73] + +vsat.bu $vr17, $vr8, 6 +# CHECK-INST: vsat.bu $vr17, $vr8, 6 +# CHECK-ENCODING: encoding: [0x11,0x39,0x28,0x73] + +vsat.hu $vr2, $vr14, 2 +# CHECK-INST: vsat.hu $vr2, $vr14, 2 +# CHECK-ENCODING: encoding: [0xc2,0x49,0x28,0x73] + +vsat.wu $vr1, $vr28, 19 +# CHECK-INST: vsat.wu $vr1, $vr28, 19 +# CHECK-ENCODING: encoding: [0x81,0xcf,0x28,0x73] + +vsat.du $vr25, $vr6, 59 +# CHECK-INST: vsat.du $vr25, $vr6, 59 +# CHECK-ENCODING: encoding: [0xd9,0xec,0x29,0x73] diff --git a/llvm/test/MC/LoongArch/lsx/seq.s b/llvm/test/MC/LoongArch/lsx/seq.s new file mode 100644 index 0000000000000000000000000000000000000000..764c94ef6f1a1200c32e0a677134b27f64a8581d --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/seq.s @@ -0,0 +1,36 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vseq.b $vr15, $vr30, $vr24 +# CHECK-INST: vseq.b $vr15, $vr30, $vr24 +# CHECK-ENCODING: encoding: [0xcf,0x63,0x00,0x70] + +vseq.h $vr7, $vr4, $vr22 +# CHECK-INST: vseq.h $vr7, $vr4, $vr22 +# CHECK-ENCODING: encoding: [0x87,0xd8,0x00,0x70] + +vseq.w $vr4, $vr15, $vr28 +# CHECK-INST: vseq.w $vr4, $vr15, $vr28 +# CHECK-ENCODING: encoding: [0xe4,0x71,0x01,0x70] + +vseq.d $vr29, $vr26, $vr22 +# CHECK-INST: vseq.d $vr29, $vr26, $vr22 +# CHECK-ENCODING: encoding: [0x5d,0xdb,0x01,0x70] + +vseqi.b $vr19, $vr30, 14 +# CHECK-INST: vseqi.b $vr19, $vr30, 14 +# CHECK-ENCODING: encoding: [0xd3,0x3b,0x80,0x72] + +vseqi.h $vr15, $vr2, 15 +# CHECK-INST: vseqi.h $vr15, $vr2, 15 +# CHECK-ENCODING: encoding: [0x4f,0xbc,0x80,0x72] + +vseqi.w $vr27, $vr23, -10 +# CHECK-INST: vseqi.w $vr27, $vr23, -10 +# CHECK-ENCODING: encoding: [0xfb,0x5a,0x81,0x72] + +vseqi.d $vr6, $vr12, -2 +# CHECK-INST: vseqi.d $vr6, $vr12, -2 +# CHECK-ENCODING: encoding: [0x86,0xf9,0x81,0x72] diff --git a/llvm/test/MC/LoongArch/lsx/set.s b/llvm/test/MC/LoongArch/lsx/set.s new file mode 100644 index 0000000000000000000000000000000000000000..bd2cfb57a4e8f056fb152c852acae1b34a5a3e64 --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/set.s @@ -0,0 +1,12 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vseteqz.v $fcc0, $vr13 +# CHECK-INST: vseteqz.v $fcc0, $vr13 +# CHECK-ENCODING: encoding: [0xa0,0x99,0x9c,0x72] + +vsetnez.v $fcc7, $vr14 +# CHECK-INST: vsetnez.v $fcc7, $vr14 +# CHECK-ENCODING: encoding: [0xc7,0x9d,0x9c,0x72] diff --git a/llvm/test/MC/LoongArch/lsx/setallnez.s b/llvm/test/MC/LoongArch/lsx/setallnez.s new file mode 100644 index 0000000000000000000000000000000000000000..8ca6f14973145fd48183ccf5c4072cb94b33b736 --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/setallnez.s @@ -0,0 +1,20 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vsetallnez.b $fcc2, $vr8 +# CHECK-INST: vsetallnez.b $fcc2, $vr8 +# CHECK-ENCODING: encoding: [0x02,0xb1,0x9c,0x72] + +vsetallnez.h $fcc0, $vr26 +# CHECK-INST: vsetallnez.h $fcc0, $vr26 +# CHECK-ENCODING: encoding: [0x40,0xb7,0x9c,0x72] + +vsetallnez.w $fcc6, $vr17 +# CHECK-INST: vsetallnez.w $fcc6, $vr17 +# CHECK-ENCODING: encoding: [0x26,0xba,0x9c,0x72] + +vsetallnez.d $fcc0, $vr27 +# CHECK-INST: vsetallnez.d $fcc0, $vr27 +# CHECK-ENCODING: encoding: [0x60,0xbf,0x9c,0x72] diff --git a/llvm/test/MC/LoongArch/lsx/setanyeqz.s b/llvm/test/MC/LoongArch/lsx/setanyeqz.s new file mode 100644 index 0000000000000000000000000000000000000000..6dbd4f17048f5f3ae7f373cd139560a79915fb05 --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/setanyeqz.s @@ -0,0 +1,20 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vsetanyeqz.b $fcc3, $vr4 +# CHECK-INST: vsetanyeqz.b $fcc3, $vr4 +# CHECK-ENCODING: encoding: [0x83,0xa0,0x9c,0x72] + +vsetanyeqz.h $fcc2, $vr15 +# CHECK-INST: vsetanyeqz.h $fcc2, $vr15 +# CHECK-ENCODING: encoding: [0xe2,0xa5,0x9c,0x72] + +vsetanyeqz.w $fcc4, $vr0 +# CHECK-INST: vsetanyeqz.w $fcc4, $vr0 +# CHECK-ENCODING: encoding: [0x04,0xa8,0x9c,0x72] + +vsetanyeqz.d $fcc3, $vr7 +# CHECK-INST: vsetanyeqz.d $fcc3, $vr7 +# CHECK-ENCODING: encoding: [0xe3,0xac,0x9c,0x72] diff --git a/llvm/test/MC/LoongArch/lsx/shuf.s b/llvm/test/MC/LoongArch/lsx/shuf.s new file mode 100644 index 0000000000000000000000000000000000000000..0e73aba22e98a478b667d3494e7c33e62e1d94f0 --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/shuf.s @@ -0,0 +1,20 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vshuf.b $vr27, $vr17, $vr31, $vr28 +# CHECK-INST: vshuf.b $vr27, $vr17, $vr31, $vr28 +# CHECK-ENCODING: encoding: [0x3b,0x7e,0x5e,0x0d] + +vshuf.h $vr21, $vr10, $vr31 +# CHECK-INST: vshuf.h $vr21, $vr10, $vr31 +# CHECK-ENCODING: encoding: [0x55,0xfd,0x7a,0x71] + +vshuf.w $vr18, $vr17, $vr23 +# CHECK-INST: vshuf.w $vr18, $vr17, $vr23 +# CHECK-ENCODING: encoding: [0x32,0x5e,0x7b,0x71] + +vshuf.d $vr4, $vr24, $vr11 +# CHECK-INST: vshuf.d $vr4, $vr24, $vr11 +# CHECK-ENCODING: encoding: [0x04,0xaf,0x7b,0x71] diff --git a/llvm/test/MC/LoongArch/lsx/shuf4i.s b/llvm/test/MC/LoongArch/lsx/shuf4i.s new file mode 100644 index 0000000000000000000000000000000000000000..d22e2956c77b34021dd6a76091fb77904a89d7b0 --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/shuf4i.s @@ -0,0 +1,20 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vshuf4i.b $vr30, $vr14, 72 +# CHECK-INST: vshuf4i.b $vr30, $vr14, 72 +# CHECK-ENCODING: encoding: [0xde,0x21,0x91,0x73] + +vshuf4i.h $vr13, $vr4, 222 +# CHECK-INST: vshuf4i.h $vr13, $vr4, 222 +# CHECK-ENCODING: encoding: [0x8d,0x78,0x97,0x73] + +vshuf4i.w $vr17, $vr8, 74 +# CHECK-INST: vshuf4i.w $vr17, $vr8, 74 +# CHECK-ENCODING: encoding: [0x11,0x29,0x99,0x73] + +vshuf4i.d $vr11, $vr6, 157 +# CHECK-INST: vshuf4i.d $vr11, $vr6, 157 +# CHECK-ENCODING: encoding: [0xcb,0x74,0x9e,0x73] diff --git a/llvm/test/MC/LoongArch/lsx/signcov.s b/llvm/test/MC/LoongArch/lsx/signcov.s new file mode 100644 index 0000000000000000000000000000000000000000..343a46f3ef0fe1656d4012705e15bff57797d68e --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/signcov.s @@ -0,0 +1,20 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vsigncov.b $vr11, $vr3, $vr7 +# CHECK-INST: vsigncov.b $vr11, $vr3, $vr7 +# CHECK-ENCODING: encoding: [0x6b,0x1c,0x2e,0x71] + +vsigncov.h $vr8, $vr29, $vr1 +# CHECK-INST: vsigncov.h $vr8, $vr29, $vr1 +# CHECK-ENCODING: encoding: [0xa8,0x87,0x2e,0x71] + +vsigncov.w $vr28, $vr13, $vr21 +# CHECK-INST: vsigncov.w $vr28, $vr13, $vr21 +# CHECK-ENCODING: encoding: [0xbc,0x55,0x2f,0x71] + +vsigncov.d $vr22, $vr20, $vr0 +# CHECK-INST: vsigncov.d $vr22, $vr20, $vr0 +# CHECK-ENCODING: encoding: [0x96,0x82,0x2f,0x71] diff --git a/llvm/test/MC/LoongArch/lsx/sle.s b/llvm/test/MC/LoongArch/lsx/sle.s new file mode 100644 index 0000000000000000000000000000000000000000..cf86e632947f7d701d8167ffc04ecfe1c48b18ae --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/sle.s @@ -0,0 +1,68 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vsle.b $vr4, $vr30, $vr18 +# CHECK-INST: vsle.b $vr4, $vr30, $vr18 +# CHECK-ENCODING: encoding: [0xc4,0x4b,0x02,0x70] + +vsle.h $vr3, $vr13, $vr12 +# CHECK-INST: vsle.h $vr3, $vr13, $vr12 +# CHECK-ENCODING: encoding: [0xa3,0xb1,0x02,0x70] + +vsle.w $vr21, $vr17, $vr20 +# CHECK-INST: vsle.w $vr21, $vr17, $vr20 +# CHECK-ENCODING: encoding: [0x35,0x52,0x03,0x70] + +vsle.d $vr22, $vr0, $vr28 +# CHECK-INST: vsle.d $vr22, $vr0, $vr28 +# CHECK-ENCODING: encoding: [0x16,0xf0,0x03,0x70] + +vslei.b $vr8, $vr11, 4 +# CHECK-INST: vslei.b $vr8, $vr11, 4 +# CHECK-ENCODING: encoding: [0x68,0x11,0x82,0x72] + +vslei.h $vr15, $vr22, 0 +# CHECK-INST: vslei.h $vr15, $vr22, 0 +# CHECK-ENCODING: encoding: [0xcf,0x82,0x82,0x72] + +vslei.w $vr23, $vr17, 12 +# CHECK-INST: vslei.w $vr23, $vr17, 12 +# CHECK-ENCODING: encoding: [0x37,0x32,0x83,0x72] + +vslei.d $vr11, $vr18, -12 +# CHECK-INST: vslei.d $vr11, $vr18, -12 +# CHECK-ENCODING: encoding: [0x4b,0xd2,0x83,0x72] + +vsle.bu $vr20, $vr11, $vr31 +# CHECK-INST: vsle.bu $vr20, $vr11, $vr31 +# CHECK-ENCODING: encoding: [0x74,0x7d,0x04,0x70] + +vsle.hu $vr5, $vr6, $vr7 +# CHECK-INST: vsle.hu $vr5, $vr6, $vr7 +# CHECK-ENCODING: encoding: [0xc5,0x9c,0x04,0x70] + +vsle.wu $vr15, $vr14, $vr22 +# CHECK-INST: vsle.wu $vr15, $vr14, $vr22 +# CHECK-ENCODING: encoding: [0xcf,0x59,0x05,0x70] + +vsle.du $vr0, $vr29, $vr17 +# CHECK-INST: vsle.du $vr0, $vr29, $vr17 +# CHECK-ENCODING: encoding: [0xa0,0xc7,0x05,0x70] + +vslei.bu $vr12, $vr27, 12 +# CHECK-INST: vslei.bu $vr12, $vr27, 12 +# CHECK-ENCODING: encoding: [0x6c,0x33,0x84,0x72] + +vslei.hu $vr22, $vr31, 12 +# CHECK-INST: vslei.hu $vr22, $vr31, 12 +# CHECK-ENCODING: encoding: [0xf6,0xb3,0x84,0x72] + +vslei.wu $vr19, $vr18, 21 +# CHECK-INST: vslei.wu $vr19, $vr18, 21 +# CHECK-ENCODING: encoding: [0x53,0x56,0x85,0x72] + +vslei.du $vr19, $vr14, 26 +# CHECK-INST: vslei.du $vr19, $vr14, 26 +# CHECK-ENCODING: encoding: [0xd3,0xe9,0x85,0x72] diff --git a/llvm/test/MC/LoongArch/lsx/sll.s b/llvm/test/MC/LoongArch/lsx/sll.s new file mode 100644 index 0000000000000000000000000000000000000000..e443abeef59ffe099d428275eb6fb85d329a5d56 --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/sll.s @@ -0,0 +1,36 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vsll.b $vr31, $vr13, $vr5 +# CHECK-INST: vsll.b $vr31, $vr13, $vr5 +# CHECK-ENCODING: encoding: [0xbf,0x15,0xe8,0x70] + +vsll.h $vr31, $vr1, $vr4 +# CHECK-INST: vsll.h $vr31, $vr1, $vr4 +# CHECK-ENCODING: encoding: [0x3f,0x90,0xe8,0x70] + +vsll.w $vr8, $vr19, $vr19 +# CHECK-INST: vsll.w $vr8, $vr19, $vr19 +# CHECK-ENCODING: encoding: [0x68,0x4e,0xe9,0x70] + +vsll.d $vr6, $vr25, $vr6 +# CHECK-INST: vsll.d $vr6, $vr25, $vr6 +# CHECK-ENCODING: encoding: [0x26,0x9b,0xe9,0x70] + +vslli.b $vr6, $vr7, 2 +# CHECK-INST: vslli.b $vr6, $vr7, 2 +# CHECK-ENCODING: encoding: [0xe6,0x28,0x2c,0x73] + +vslli.h $vr6, $vr4, 10 +# CHECK-INST: vslli.h $vr6, $vr4, 10 +# CHECK-ENCODING: encoding: [0x86,0x68,0x2c,0x73] + +vslli.w $vr3, $vr13, 17 +# CHECK-INST: vslli.w $vr3, $vr13, 17 +# CHECK-ENCODING: encoding: [0xa3,0xc5,0x2c,0x73] + +vslli.d $vr24, $vr11, 38 +# CHECK-INST: vslli.d $vr24, $vr11, 38 +# CHECK-ENCODING: encoding: [0x78,0x99,0x2d,0x73] diff --git a/llvm/test/MC/LoongArch/lsx/sllwil.s b/llvm/test/MC/LoongArch/lsx/sllwil.s new file mode 100644 index 0000000000000000000000000000000000000000..3aec8d63ab2426d2af52cf44c649b19745a396e6 --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/sllwil.s @@ -0,0 +1,28 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vsllwil.h.b $vr7, $vr6, 3 +# CHECK-INST: vsllwil.h.b $vr7, $vr6, 3 +# CHECK-ENCODING: encoding: [0xc7,0x2c,0x08,0x73] + +vsllwil.w.h $vr6, $vr5, 8 +# CHECK-INST: vsllwil.w.h $vr6, $vr5, 8 +# CHECK-ENCODING: encoding: [0xa6,0x60,0x08,0x73] + +vsllwil.d.w $vr15, $vr1, 22 +# CHECK-INST: vsllwil.d.w $vr15, $vr1, 22 +# CHECK-ENCODING: encoding: [0x2f,0xd8,0x08,0x73] + +vsllwil.hu.bu $vr13, $vr4, 4 +# CHECK-INST: vsllwil.hu.bu $vr13, $vr4, 4 +# CHECK-ENCODING: encoding: [0x8d,0x30,0x0c,0x73] + +vsllwil.wu.hu $vr1, $vr4, 3 +# CHECK-INST: vsllwil.wu.hu $vr1, $vr4, 3 +# CHECK-ENCODING: encoding: [0x81,0x4c,0x0c,0x73] + +vsllwil.du.wu $vr18, $vr29, 25 +# CHECK-INST: vsllwil.du.wu $vr18, $vr29, 25 +# CHECK-ENCODING: encoding: [0xb2,0xe7,0x0c,0x73] diff --git a/llvm/test/MC/LoongArch/lsx/slt.s b/llvm/test/MC/LoongArch/lsx/slt.s new file mode 100644 index 0000000000000000000000000000000000000000..86034324d3d599f2eaadbda480ed3242e77a03dc --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/slt.s @@ -0,0 +1,68 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vslt.b $vr24, $vr23, $vr26 +# CHECK-INST: vslt.b $vr24, $vr23, $vr26 +# CHECK-ENCODING: encoding: [0xf8,0x6a,0x06,0x70] + +vslt.h $vr23, $vr4, $vr6 +# CHECK-INST: vslt.h $vr23, $vr4, $vr6 +# CHECK-ENCODING: encoding: [0x97,0x98,0x06,0x70] + +vslt.w $vr30, $vr25, $vr1 +# CHECK-INST: vslt.w $vr30, $vr25, $vr1 +# CHECK-ENCODING: encoding: [0x3e,0x07,0x07,0x70] + +vslt.d $vr25, $vr22, $vr15 +# CHECK-INST: vslt.d $vr25, $vr22, $vr15 +# CHECK-ENCODING: encoding: [0xd9,0xbe,0x07,0x70] + +vslti.b $vr11, $vr12, -10 +# CHECK-INST: vslti.b $vr11, $vr12, -10 +# CHECK-ENCODING: encoding: [0x8b,0x59,0x86,0x72] + +vslti.h $vr20, $vr12, -8 +# CHECK-INST: vslti.h $vr20, $vr12, -8 +# CHECK-ENCODING: encoding: [0x94,0xe1,0x86,0x72] + +vslti.w $vr20, $vr27, 0 +# CHECK-INST: vslti.w $vr20, $vr27, 0 +# CHECK-ENCODING: encoding: [0x74,0x03,0x87,0x72] + +vslti.d $vr19, $vr18, 4 +# CHECK-INST: vslti.d $vr19, $vr18, 4 +# CHECK-ENCODING: encoding: [0x53,0x92,0x87,0x72] + +vslt.bu $vr5, $vr30, $vr28 +# CHECK-INST: vslt.bu $vr5, $vr30, $vr28 +# CHECK-ENCODING: encoding: [0xc5,0x73,0x08,0x70] + +vslt.hu $vr13, $vr28, $vr23 +# CHECK-INST: vslt.hu $vr13, $vr28, $vr23 +# CHECK-ENCODING: encoding: [0x8d,0xdf,0x08,0x70] + +vslt.wu $vr20, $vr28, $vr1 +# CHECK-INST: vslt.wu $vr20, $vr28, $vr1 +# CHECK-ENCODING: encoding: [0x94,0x07,0x09,0x70] + +vslt.du $vr6, $vr6, $vr5 +# CHECK-INST: vslt.du $vr6, $vr6, $vr5 +# CHECK-ENCODING: encoding: [0xc6,0x94,0x09,0x70] + +vslti.bu $vr9, $vr29, 23 +# CHECK-INST: vslti.bu $vr9, $vr29, 23 +# CHECK-ENCODING: encoding: [0xa9,0x5f,0x88,0x72] + +vslti.hu $vr28, $vr13, 6 +# CHECK-INST: vslti.hu $vr28, $vr13, 6 +# CHECK-ENCODING: encoding: [0xbc,0x99,0x88,0x72] + +vslti.wu $vr11, $vr9, 12 +# CHECK-INST: vslti.wu $vr11, $vr9, 12 +# CHECK-ENCODING: encoding: [0x2b,0x31,0x89,0x72] + +vslti.du $vr23, $vr30, 21 +# CHECK-INST: vslti.du $vr23, $vr30, 21 +# CHECK-ENCODING: encoding: [0xd7,0xd7,0x89,0x72] diff --git a/llvm/test/MC/LoongArch/lsx/sra.s b/llvm/test/MC/LoongArch/lsx/sra.s new file mode 100644 index 0000000000000000000000000000000000000000..3220a4159c53ea77c74806cb24df4f1bc24dc0d9 --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/sra.s @@ -0,0 +1,36 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vsra.b $vr30, $vr9, $vr11 +# CHECK-INST: vsra.b $vr30, $vr9, $vr11 +# CHECK-ENCODING: encoding: [0x3e,0x2d,0xec,0x70] + +vsra.h $vr20, $vr17, $vr26 +# CHECK-INST: vsra.h $vr20, $vr17, $vr26 +# CHECK-ENCODING: encoding: [0x34,0xea,0xec,0x70] + +vsra.w $vr12, $vr21, $vr15 +# CHECK-INST: vsra.w $vr12, $vr21, $vr15 +# CHECK-ENCODING: encoding: [0xac,0x3e,0xed,0x70] + +vsra.d $vr8, $vr8, $vr11 +# CHECK-INST: vsra.d $vr8, $vr8, $vr11 +# CHECK-ENCODING: encoding: [0x08,0xad,0xed,0x70] + +vsrai.b $vr9, $vr0, 4 +# CHECK-INST: vsrai.b $vr9, $vr0, 4 +# CHECK-ENCODING: encoding: [0x09,0x30,0x34,0x73] + +vsrai.h $vr1, $vr8, 6 +# CHECK-INST: vsrai.h $vr1, $vr8, 6 +# CHECK-ENCODING: encoding: [0x01,0x59,0x34,0x73] + +vsrai.w $vr20, $vr30, 14 +# CHECK-INST: vsrai.w $vr20, $vr30, 14 +# CHECK-ENCODING: encoding: [0xd4,0xbb,0x34,0x73] + +vsrai.d $vr0, $vr21, 12 +# CHECK-INST: vsrai.d $vr0, $vr21, 12 +# CHECK-ENCODING: encoding: [0xa0,0x32,0x35,0x73] diff --git a/llvm/test/MC/LoongArch/lsx/sran.s b/llvm/test/MC/LoongArch/lsx/sran.s new file mode 100644 index 0000000000000000000000000000000000000000..595a5be90dacc3b8414edf5ee33566d9cfd839d2 --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/sran.s @@ -0,0 +1,16 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vsran.b.h $vr25, $vr2, $vr31 +# CHECK-INST: vsran.b.h $vr25, $vr2, $vr31 +# CHECK-ENCODING: encoding: [0x59,0xfc,0xf6,0x70] + +vsran.h.w $vr31, $vr10, $vr3 +# CHECK-INST: vsran.h.w $vr31, $vr10, $vr3 +# CHECK-ENCODING: encoding: [0x5f,0x0d,0xf7,0x70] + +vsran.w.d $vr8, $vr3, $vr12 +# CHECK-INST: vsran.w.d $vr8, $vr3, $vr12 +# CHECK-ENCODING: encoding: [0x68,0xb0,0xf7,0x70] diff --git a/llvm/test/MC/LoongArch/lsx/srani.s b/llvm/test/MC/LoongArch/lsx/srani.s new file mode 100644 index 0000000000000000000000000000000000000000..f28d7280735952c5373ecdd8b4ecde033d6a2dfa --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/srani.s @@ -0,0 +1,20 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vsrani.b.h $vr3, $vr0, 9 +# CHECK-INST: vsrani.b.h $vr3, $vr0, 9 +# CHECK-ENCODING: encoding: [0x03,0x64,0x58,0x73] + +vsrani.h.w $vr4, $vr3, 26 +# CHECK-INST: vsrani.h.w $vr4, $vr3, 26 +# CHECK-ENCODING: encoding: [0x64,0xe8,0x58,0x73] + +vsrani.w.d $vr8, $vr27, 52 +# CHECK-INST: vsrani.w.d $vr8, $vr27, 52 +# CHECK-ENCODING: encoding: [0x68,0xd3,0x59,0x73] + +vsrani.d.q $vr21, $vr24, 28 +# CHECK-INST: vsrani.d.q $vr21, $vr24, 28 +# CHECK-ENCODING: encoding: [0x15,0x73,0x5a,0x73] diff --git a/llvm/test/MC/LoongArch/lsx/srar.s b/llvm/test/MC/LoongArch/lsx/srar.s new file mode 100644 index 0000000000000000000000000000000000000000..b62bda2030eabf93eef53717ff0a888ebc763fdb --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/srar.s @@ -0,0 +1,36 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vsrar.b $vr5, $vr31, $vr9 +# CHECK-INST: vsrar.b $vr5, $vr31, $vr9 +# CHECK-ENCODING: encoding: [0xe5,0x27,0xf2,0x70] + +vsrar.h $vr30, $vr23, $vr30 +# CHECK-INST: vsrar.h $vr30, $vr23, $vr30 +# CHECK-ENCODING: encoding: [0xfe,0xfa,0xf2,0x70] + +vsrar.w $vr22, $vr8, $vr1 +# CHECK-INST: vsrar.w $vr22, $vr8, $vr1 +# CHECK-ENCODING: encoding: [0x16,0x05,0xf3,0x70] + +vsrar.d $vr17, $vr1, $vr5 +# CHECK-INST: vsrar.d $vr17, $vr1, $vr5 +# CHECK-ENCODING: encoding: [0x31,0x94,0xf3,0x70] + +vsrari.b $vr11, $vr24, 5 +# CHECK-INST: vsrari.b $vr11, $vr24, 5 +# CHECK-ENCODING: encoding: [0x0b,0x37,0xa8,0x72] + +vsrari.h $vr24, $vr0, 7 +# CHECK-INST: vsrari.h $vr24, $vr0, 7 +# CHECK-ENCODING: encoding: [0x18,0x5c,0xa8,0x72] + +vsrari.w $vr16, $vr0, 0 +# CHECK-INST: vsrari.w $vr16, $vr0, 0 +# CHECK-ENCODING: encoding: [0x10,0x80,0xa8,0x72] + +vsrari.d $vr16, $vr13, 63 +# CHECK-INST: vsrari.d $vr16, $vr13, 63 +# CHECK-ENCODING: encoding: [0xb0,0xfd,0xa9,0x72] diff --git a/llvm/test/MC/LoongArch/lsx/srarn.s b/llvm/test/MC/LoongArch/lsx/srarn.s new file mode 100644 index 0000000000000000000000000000000000000000..665f722d67937f34077468d6a05599d3d39528b9 --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/srarn.s @@ -0,0 +1,16 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vsrarn.b.h $vr19, $vr23, $vr21 +# CHECK-INST: vsrarn.b.h $vr19, $vr23, $vr21 +# CHECK-ENCODING: encoding: [0xf3,0xd6,0xfa,0x70] + +vsrarn.h.w $vr18, $vr6, $vr7 +# CHECK-INST: vsrarn.h.w $vr18, $vr6, $vr7 +# CHECK-ENCODING: encoding: [0xd2,0x1c,0xfb,0x70] + +vsrarn.w.d $vr2, $vr11, $vr5 +# CHECK-INST: vsrarn.w.d $vr2, $vr11, $vr5 +# CHECK-ENCODING: encoding: [0x62,0x95,0xfb,0x70] diff --git a/llvm/test/MC/LoongArch/lsx/srarni.s b/llvm/test/MC/LoongArch/lsx/srarni.s new file mode 100644 index 0000000000000000000000000000000000000000..356d071a6e919cc17bd5d55d0466bfda87f7c65a --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/srarni.s @@ -0,0 +1,20 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vsrarni.b.h $vr29, $vr20, 5 +# CHECK-INST: vsrarni.b.h $vr29, $vr20, 5 +# CHECK-ENCODING: encoding: [0x9d,0x56,0x5c,0x73] + +vsrarni.h.w $vr3, $vr29, 14 +# CHECK-INST: vsrarni.h.w $vr3, $vr29, 14 +# CHECK-ENCODING: encoding: [0xa3,0xbb,0x5c,0x73] + +vsrarni.w.d $vr14, $vr19, 10 +# CHECK-INST: vsrarni.w.d $vr14, $vr19, 10 +# CHECK-ENCODING: encoding: [0x6e,0x2a,0x5d,0x73] + +vsrarni.d.q $vr22, $vr27, 38 +# CHECK-INST: vsrarni.d.q $vr22, $vr27, 38 +# CHECK-ENCODING: encoding: [0x76,0x9b,0x5e,0x73] diff --git a/llvm/test/MC/LoongArch/lsx/srl.s b/llvm/test/MC/LoongArch/lsx/srl.s new file mode 100644 index 0000000000000000000000000000000000000000..d6d806bfb12a4afa12a7f5025be3b136530c2332 --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/srl.s @@ -0,0 +1,36 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vsrl.b $vr20, $vr7, $vr27 +# CHECK-INST: vsrl.b $vr20, $vr7, $vr27 +# CHECK-ENCODING: encoding: [0xf4,0x6c,0xea,0x70] + +vsrl.h $vr31, $vr5, $vr31 +# CHECK-INST: vsrl.h $vr31, $vr5, $vr31 +# CHECK-ENCODING: encoding: [0xbf,0xfc,0xea,0x70] + +vsrl.w $vr31, $vr0, $vr6 +# CHECK-INST: vsrl.w $vr31, $vr0, $vr6 +# CHECK-ENCODING: encoding: [0x1f,0x18,0xeb,0x70] + +vsrl.d $vr6, $vr8, $vr7 +# CHECK-INST: vsrl.d $vr6, $vr8, $vr7 +# CHECK-ENCODING: encoding: [0x06,0x9d,0xeb,0x70] + +vsrli.b $vr17, $vr8, 6 +# CHECK-INST: vsrli.b $vr17, $vr8, 6 +# CHECK-ENCODING: encoding: [0x11,0x39,0x30,0x73] + +vsrli.h $vr3, $vr31, 2 +# CHECK-INST: vsrli.h $vr3, $vr31, 2 +# CHECK-ENCODING: encoding: [0xe3,0x4b,0x30,0x73] + +vsrli.w $vr17, $vr5, 0 +# CHECK-INST: vsrli.w $vr17, $vr5, 0 +# CHECK-ENCODING: encoding: [0xb1,0x80,0x30,0x73] + +vsrli.d $vr16, $vr22, 34 +# CHECK-INST: vsrli.d $vr16, $vr22, 34 +# CHECK-ENCODING: encoding: [0xd0,0x8a,0x31,0x73] diff --git a/llvm/test/MC/LoongArch/lsx/srln.s b/llvm/test/MC/LoongArch/lsx/srln.s new file mode 100644 index 0000000000000000000000000000000000000000..55b1f1b921a181615823c50cfc99ba0e42fe4a8d --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/srln.s @@ -0,0 +1,16 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vsrln.b.h $vr29, $vr28, $vr27 +# CHECK-INST: vsrln.b.h $vr29, $vr28, $vr27 +# CHECK-ENCODING: encoding: [0x9d,0xef,0xf4,0x70] + +vsrln.h.w $vr18, $vr17, $vr0 +# CHECK-INST: vsrln.h.w $vr18, $vr17, $vr0 +# CHECK-ENCODING: encoding: [0x32,0x02,0xf5,0x70] + +vsrln.w.d $vr16, $vr5, $vr19 +# CHECK-INST: vsrln.w.d $vr16, $vr5, $vr19 +# CHECK-ENCODING: encoding: [0xb0,0xcc,0xf5,0x70] diff --git a/llvm/test/MC/LoongArch/lsx/srlni.s b/llvm/test/MC/LoongArch/lsx/srlni.s new file mode 100644 index 0000000000000000000000000000000000000000..97c7831e8568e3e156f1cf722b8f27f38ee16389 --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/srlni.s @@ -0,0 +1,20 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vsrlni.b.h $vr15, $vr25, 9 +# CHECK-INST: vsrlni.b.h $vr15, $vr25, 9 +# CHECK-ENCODING: encoding: [0x2f,0x67,0x40,0x73] + +vsrlni.h.w $vr3, $vr0, 8 +# CHECK-INST: vsrlni.h.w $vr3, $vr0, 8 +# CHECK-ENCODING: encoding: [0x03,0xa0,0x40,0x73] + +vsrlni.w.d $vr19, $vr26, 51 +# CHECK-INST: vsrlni.w.d $vr19, $vr26, 51 +# CHECK-ENCODING: encoding: [0x53,0xcf,0x41,0x73] + +vsrlni.d.q $vr10, $vr18, 60 +# CHECK-INST: vsrlni.d.q $vr10, $vr18, 60 +# CHECK-ENCODING: encoding: [0x4a,0xf2,0x42,0x73] diff --git a/llvm/test/MC/LoongArch/lsx/srlr.s b/llvm/test/MC/LoongArch/lsx/srlr.s new file mode 100644 index 0000000000000000000000000000000000000000..b78c178b5db8e7edd8d8c37a7b65e46272a11d17 --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/srlr.s @@ -0,0 +1,36 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vsrlr.b $vr27, $vr1, $vr6 +# CHECK-INST: vsrlr.b $vr27, $vr1, $vr6 +# CHECK-ENCODING: encoding: [0x3b,0x18,0xf0,0x70] + +vsrlr.h $vr31, $vr18, $vr2 +# CHECK-INST: vsrlr.h $vr31, $vr18, $vr2 +# CHECK-ENCODING: encoding: [0x5f,0x8a,0xf0,0x70] + +vsrlr.w $vr21, $vr29, $vr30 +# CHECK-INST: vsrlr.w $vr21, $vr29, $vr30 +# CHECK-ENCODING: encoding: [0xb5,0x7b,0xf1,0x70] + +vsrlr.d $vr4, $vr3, $vr30 +# CHECK-INST: vsrlr.d $vr4, $vr3, $vr30 +# CHECK-ENCODING: encoding: [0x64,0xf8,0xf1,0x70] + +vsrlri.b $vr20, $vr24, 6 +# CHECK-INST: vsrlri.b $vr20, $vr24, 6 +# CHECK-ENCODING: encoding: [0x14,0x3b,0xa4,0x72] + +vsrlri.h $vr23, $vr22, 4 +# CHECK-INST: vsrlri.h $vr23, $vr22, 4 +# CHECK-ENCODING: encoding: [0xd7,0x52,0xa4,0x72] + +vsrlri.w $vr19, $vr8, 1 +# CHECK-INST: vsrlri.w $vr19, $vr8, 1 +# CHECK-ENCODING: encoding: [0x13,0x85,0xa4,0x72] + +vsrlri.d $vr18, $vr30, 51 +# CHECK-INST: vsrlri.d $vr18, $vr30, 51 +# CHECK-ENCODING: encoding: [0xd2,0xcf,0xa5,0x72] diff --git a/llvm/test/MC/LoongArch/lsx/srlrn.s b/llvm/test/MC/LoongArch/lsx/srlrn.s new file mode 100644 index 0000000000000000000000000000000000000000..a00cc80df7cfc042395b10115fff9e6ec224caad --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/srlrn.s @@ -0,0 +1,16 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vsrlrn.b.h $vr10, $vr18, $vr11 +# CHECK-INST: vsrlrn.b.h $vr10, $vr18, $vr11 +# CHECK-ENCODING: encoding: [0x4a,0xae,0xf8,0x70] + +vsrlrn.h.w $vr28, $vr15, $vr22 +# CHECK-INST: vsrlrn.h.w $vr28, $vr15, $vr22 +# CHECK-ENCODING: encoding: [0xfc,0x59,0xf9,0x70] + +vsrlrn.w.d $vr19, $vr7, $vr26 +# CHECK-INST: vsrlrn.w.d $vr19, $vr7, $vr26 +# CHECK-ENCODING: encoding: [0xf3,0xe8,0xf9,0x70] diff --git a/llvm/test/MC/LoongArch/lsx/srlrni.s b/llvm/test/MC/LoongArch/lsx/srlrni.s new file mode 100644 index 0000000000000000000000000000000000000000..361914b44574cc52a0109773062facb80dd6ed8a --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/srlrni.s @@ -0,0 +1,20 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vsrlrni.b.h $vr15, $vr5, 3 +# CHECK-INST: vsrlrni.b.h $vr15, $vr5, 3 +# CHECK-ENCODING: encoding: [0xaf,0x4c,0x44,0x73] + +vsrlrni.h.w $vr28, $vr27, 1 +# CHECK-INST: vsrlrni.h.w $vr28, $vr27, 1 +# CHECK-ENCODING: encoding: [0x7c,0x87,0x44,0x73] + +vsrlrni.w.d $vr3, $vr25, 56 +# CHECK-INST: vsrlrni.w.d $vr3, $vr25, 56 +# CHECK-ENCODING: encoding: [0x23,0xe3,0x45,0x73] + +vsrlrni.d.q $vr4, $vr16, 13 +# CHECK-INST: vsrlrni.d.q $vr4, $vr16, 13 +# CHECK-ENCODING: encoding: [0x04,0x36,0x46,0x73] diff --git a/llvm/test/MC/LoongArch/lsx/ssran.s b/llvm/test/MC/LoongArch/lsx/ssran.s new file mode 100644 index 0000000000000000000000000000000000000000..fefbd29a2b64d9d702bfe8c5cfc15a25907b0826 --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/ssran.s @@ -0,0 +1,28 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vssran.b.h $vr26, $vr26, $vr18 +# CHECK-INST: vssran.b.h $vr26, $vr26, $vr18 +# CHECK-ENCODING: encoding: [0x5a,0xcb,0xfe,0x70] + +vssran.h.w $vr21, $vr14, $vr11 +# CHECK-INST: vssran.h.w $vr21, $vr14, $vr11 +# CHECK-ENCODING: encoding: [0xd5,0x2d,0xff,0x70] + +vssran.w.d $vr4, $vr21, $vr11 +# CHECK-INST: vssran.w.d $vr4, $vr21, $vr11 +# CHECK-ENCODING: encoding: [0xa4,0xae,0xff,0x70] + +vssran.bu.h $vr10, $vr30, $vr19 +# CHECK-INST: vssran.bu.h $vr10, $vr30, $vr19 +# CHECK-ENCODING: encoding: [0xca,0xcf,0x06,0x71] + +vssran.hu.w $vr7, $vr8, $vr20 +# CHECK-INST: vssran.hu.w $vr7, $vr8, $vr20 +# CHECK-ENCODING: encoding: [0x07,0x51,0x07,0x71] + +vssran.wu.d $vr10, $vr21, $vr0 +# CHECK-INST: vssran.wu.d $vr10, $vr21, $vr0 +# CHECK-ENCODING: encoding: [0xaa,0x82,0x07,0x71] diff --git a/llvm/test/MC/LoongArch/lsx/ssrani.s b/llvm/test/MC/LoongArch/lsx/ssrani.s new file mode 100644 index 0000000000000000000000000000000000000000..20ac1cbad35c1531850123a9a0407b614192d4dc --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/ssrani.s @@ -0,0 +1,36 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vssrani.b.h $vr3, $vr12, 10 +# CHECK-INST: vssrani.b.h $vr3, $vr12, 10 +# CHECK-ENCODING: encoding: [0x83,0x69,0x60,0x73] + +vssrani.h.w $vr3, $vr25, 0 +# CHECK-INST: vssrani.h.w $vr3, $vr25, 0 +# CHECK-ENCODING: encoding: [0x23,0x83,0x60,0x73] + +vssrani.w.d $vr12, $vr19, 43 +# CHECK-INST: vssrani.w.d $vr12, $vr19, 43 +# CHECK-ENCODING: encoding: [0x6c,0xae,0x61,0x73] + +vssrani.d.q $vr25, $vr8, 13 +# CHECK-INST: vssrani.d.q $vr25, $vr8, 13 +# CHECK-ENCODING: encoding: [0x19,0x35,0x62,0x73] + +vssrani.bu.h $vr26, $vr16, 12 +# CHECK-INST: vssrani.bu.h $vr26, $vr16, 12 +# CHECK-ENCODING: encoding: [0x1a,0x72,0x64,0x73] + +vssrani.hu.w $vr31, $vr6, 28 +# CHECK-INST: vssrani.hu.w $vr31, $vr6, 28 +# CHECK-ENCODING: encoding: [0xdf,0xf0,0x64,0x73] + +vssrani.wu.d $vr29, $vr25, 2 +# CHECK-INST: vssrani.wu.d $vr29, $vr25, 2 +# CHECK-ENCODING: encoding: [0x3d,0x0b,0x65,0x73] + +vssrani.du.q $vr22, $vr27, 71 +# CHECK-INST: vssrani.du.q $vr22, $vr27, 71 +# CHECK-ENCODING: encoding: [0x76,0x1f,0x67,0x73] diff --git a/llvm/test/MC/LoongArch/lsx/ssrarn.s b/llvm/test/MC/LoongArch/lsx/ssrarn.s new file mode 100644 index 0000000000000000000000000000000000000000..8157423b255c73def66c28a9bbc5a54e46ec3321 --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/ssrarn.s @@ -0,0 +1,28 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vssrarn.b.h $vr27, $vr29, $vr23 +# CHECK-INST: vssrarn.b.h $vr27, $vr29, $vr23 +# CHECK-ENCODING: encoding: [0xbb,0xdf,0x02,0x71] + +vssrarn.h.w $vr13, $vr17, $vr0 +# CHECK-INST: vssrarn.h.w $vr13, $vr17, $vr0 +# CHECK-ENCODING: encoding: [0x2d,0x02,0x03,0x71] + +vssrarn.w.d $vr5, $vr11, $vr16 +# CHECK-INST: vssrarn.w.d $vr5, $vr11, $vr16 +# CHECK-ENCODING: encoding: [0x65,0xc1,0x03,0x71] + +vssrarn.bu.h $vr18, $vr10, $vr13 +# CHECK-INST: vssrarn.bu.h $vr18, $vr10, $vr13 +# CHECK-ENCODING: encoding: [0x52,0xb5,0x0a,0x71] + +vssrarn.hu.w $vr5, $vr25, $vr16 +# CHECK-INST: vssrarn.hu.w $vr5, $vr25, $vr16 +# CHECK-ENCODING: encoding: [0x25,0x43,0x0b,0x71] + +vssrarn.wu.d $vr6, $vr23, $vr30 +# CHECK-INST: vssrarn.wu.d $vr6, $vr23, $vr30 +# CHECK-ENCODING: encoding: [0xe6,0xfa,0x0b,0x71] diff --git a/llvm/test/MC/LoongArch/lsx/ssrarni.s b/llvm/test/MC/LoongArch/lsx/ssrarni.s new file mode 100644 index 0000000000000000000000000000000000000000..9a33f67110a3a370e008dc26daae3748934e1ba9 --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/ssrarni.s @@ -0,0 +1,36 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vssrarni.b.h $vr3, $vr9, 2 +# CHECK-INST: vssrarni.b.h $vr3, $vr9, 2 +# CHECK-ENCODING: encoding: [0x23,0x49,0x68,0x73] + +vssrarni.h.w $vr21, $vr17, 8 +# CHECK-INST: vssrarni.h.w $vr21, $vr17, 8 +# CHECK-ENCODING: encoding: [0x35,0xa2,0x68,0x73] + +vssrarni.w.d $vr7, $vr6, 5 +# CHECK-INST: vssrarni.w.d $vr7, $vr6, 5 +# CHECK-ENCODING: encoding: [0xc7,0x14,0x69,0x73] + +vssrarni.d.q $vr4, $vr22, 90 +# CHECK-INST: vssrarni.d.q $vr4, $vr22, 90 +# CHECK-ENCODING: encoding: [0xc4,0x6a,0x6b,0x73] + +vssrarni.bu.h $vr25, $vr0, 9 +# CHECK-INST: vssrarni.bu.h $vr25, $vr0, 9 +# CHECK-ENCODING: encoding: [0x19,0x64,0x6c,0x73] + +vssrarni.hu.w $vr5, $vr2, 24 +# CHECK-INST: vssrarni.hu.w $vr5, $vr2, 24 +# CHECK-ENCODING: encoding: [0x45,0xe0,0x6c,0x73] + +vssrarni.wu.d $vr23, $vr29, 25 +# CHECK-INST: vssrarni.wu.d $vr23, $vr29, 25 +# CHECK-ENCODING: encoding: [0xb7,0x67,0x6d,0x73] + +vssrarni.du.q $vr2, $vr12, 106 +# CHECK-INST: vssrarni.du.q $vr2, $vr12, 106 +# CHECK-ENCODING: encoding: [0x82,0xa9,0x6f,0x73] diff --git a/llvm/test/MC/LoongArch/lsx/ssrln.s b/llvm/test/MC/LoongArch/lsx/ssrln.s new file mode 100644 index 0000000000000000000000000000000000000000..1b00e3b543224370515abeb94d0e69b8aa3fe5cc --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/ssrln.s @@ -0,0 +1,28 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vssrln.b.h $vr20, $vr5, $vr20 +# CHECK-INST: vssrln.b.h $vr20, $vr5, $vr20 +# CHECK-ENCODING: encoding: [0xb4,0xd0,0xfc,0x70] + +vssrln.h.w $vr0, $vr21, $vr2 +# CHECK-INST: vssrln.h.w $vr0, $vr21, $vr2 +# CHECK-ENCODING: encoding: [0xa0,0x0a,0xfd,0x70] + +vssrln.w.d $vr16, $vr6, $vr3 +# CHECK-INST: vssrln.w.d $vr16, $vr6, $vr3 +# CHECK-ENCODING: encoding: [0xd0,0x8c,0xfd,0x70] + +vssrln.bu.h $vr6, $vr30, $vr9 +# CHECK-INST: vssrln.bu.h $vr6, $vr30, $vr9 +# CHECK-ENCODING: encoding: [0xc6,0xa7,0x04,0x71] + +vssrln.hu.w $vr2, $vr8, $vr3 +# CHECK-INST: vssrln.hu.w $vr2, $vr8, $vr3 +# CHECK-ENCODING: encoding: [0x02,0x0d,0x05,0x71] + +vssrln.wu.d $vr28, $vr28, $vr5 +# CHECK-INST: vssrln.wu.d $vr28, $vr28, $vr5 +# CHECK-ENCODING: encoding: [0x9c,0x97,0x05,0x71] diff --git a/llvm/test/MC/LoongArch/lsx/ssrlni.s b/llvm/test/MC/LoongArch/lsx/ssrlni.s new file mode 100644 index 0000000000000000000000000000000000000000..eb630b3d61a07c976a73b1b6625b7c7f9d38f7fa --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/ssrlni.s @@ -0,0 +1,36 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vssrlni.b.h $vr2, $vr23, 5 +# CHECK-INST: vssrlni.b.h $vr2, $vr23, 5 +# CHECK-ENCODING: encoding: [0xe2,0x56,0x48,0x73] + +vssrlni.h.w $vr15, $vr20, 12 +# CHECK-INST: vssrlni.h.w $vr15, $vr20, 12 +# CHECK-ENCODING: encoding: [0x8f,0xb2,0x48,0x73] + +vssrlni.w.d $vr27, $vr9, 7 +# CHECK-INST: vssrlni.w.d $vr27, $vr9, 7 +# CHECK-ENCODING: encoding: [0x3b,0x1d,0x49,0x73] + +vssrlni.d.q $vr10, $vr2, 4 +# CHECK-INST: vssrlni.d.q $vr10, $vr2, 4 +# CHECK-ENCODING: encoding: [0x4a,0x10,0x4a,0x73] + +vssrlni.bu.h $vr19, $vr3, 2 +# CHECK-INST: vssrlni.bu.h $vr19, $vr3, 2 +# CHECK-ENCODING: encoding: [0x73,0x48,0x4c,0x73] + +vssrlni.hu.w $vr31, $vr19, 1 +# CHECK-INST: vssrlni.hu.w $vr31, $vr19, 1 +# CHECK-ENCODING: encoding: [0x7f,0x86,0x4c,0x73] + +vssrlni.wu.d $vr13, $vr27, 6 +# CHECK-INST: vssrlni.wu.d $vr13, $vr27, 6 +# CHECK-ENCODING: encoding: [0x6d,0x1b,0x4d,0x73] + +vssrlni.du.q $vr11, $vr30, 32 +# CHECK-INST: vssrlni.du.q $vr11, $vr30, 32 +# CHECK-ENCODING: encoding: [0xcb,0x83,0x4e,0x73] diff --git a/llvm/test/MC/LoongArch/lsx/ssrlrn.s b/llvm/test/MC/LoongArch/lsx/ssrlrn.s new file mode 100644 index 0000000000000000000000000000000000000000..e0eeb3ac2afc068f9f9e362d9efa2b553fd7d641 --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/ssrlrn.s @@ -0,0 +1,28 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vssrlrn.b.h $vr28, $vr3, $vr15 +# CHECK-INST: vssrlrn.b.h $vr28, $vr3, $vr15 +# CHECK-ENCODING: encoding: [0x7c,0xbc,0x00,0x71] + +vssrlrn.h.w $vr22, $vr0, $vr9 +# CHECK-INST: vssrlrn.h.w $vr22, $vr0, $vr9 +# CHECK-ENCODING: encoding: [0x16,0x24,0x01,0x71] + +vssrlrn.w.d $vr6, $vr14, $vr21 +# CHECK-INST: vssrlrn.w.d $vr6, $vr14, $vr21 +# CHECK-ENCODING: encoding: [0xc6,0xd5,0x01,0x71] + +vssrlrn.bu.h $vr10, $vr24, $vr12 +# CHECK-INST: vssrlrn.bu.h $vr10, $vr24, $vr12 +# CHECK-ENCODING: encoding: [0x0a,0xb3,0x08,0x71] + +vssrlrn.hu.w $vr29, $vr6, $vr1 +# CHECK-INST: vssrlrn.hu.w $vr29, $vr6, $vr1 +# CHECK-ENCODING: encoding: [0xdd,0x04,0x09,0x71] + +vssrlrn.wu.d $vr2, $vr23, $vr7 +# CHECK-INST: vssrlrn.wu.d $vr2, $vr23, $vr7 +# CHECK-ENCODING: encoding: [0xe2,0x9e,0x09,0x71] diff --git a/llvm/test/MC/LoongArch/lsx/ssrlrni.s b/llvm/test/MC/LoongArch/lsx/ssrlrni.s new file mode 100644 index 0000000000000000000000000000000000000000..3c5c25fdab54051f5134d02d4fed3a6cd2d53934 --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/ssrlrni.s @@ -0,0 +1,36 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vssrlrni.b.h $vr18, $vr21, 6 +# CHECK-INST: vssrlrni.b.h $vr18, $vr21, 6 +# CHECK-ENCODING: encoding: [0xb2,0x5a,0x50,0x73] + +vssrlrni.h.w $vr7, $vr12, 9 +# CHECK-INST: vssrlrni.h.w $vr7, $vr12, 9 +# CHECK-ENCODING: encoding: [0x87,0xa5,0x50,0x73] + +vssrlrni.w.d $vr10, $vr14, 63 +# CHECK-INST: vssrlrni.w.d $vr10, $vr14, 63 +# CHECK-ENCODING: encoding: [0xca,0xfd,0x51,0x73] + +vssrlrni.d.q $vr12, $vr26, 68 +# CHECK-INST: vssrlrni.d.q $vr12, $vr26, 68 +# CHECK-ENCODING: encoding: [0x4c,0x13,0x53,0x73] + +vssrlrni.bu.h $vr22, $vr24, 1 +# CHECK-INST: vssrlrni.bu.h $vr22, $vr24, 1 +# CHECK-ENCODING: encoding: [0x16,0x47,0x54,0x73] + +vssrlrni.hu.w $vr27, $vr17, 7 +# CHECK-INST: vssrlrni.hu.w $vr27, $vr17, 7 +# CHECK-ENCODING: encoding: [0x3b,0x9e,0x54,0x73] + +vssrlrni.wu.d $vr3, $vr15, 56 +# CHECK-INST: vssrlrni.wu.d $vr3, $vr15, 56 +# CHECK-ENCODING: encoding: [0xe3,0xe1,0x55,0x73] + +vssrlrni.du.q $vr12, $vr10, 4 +# CHECK-INST: vssrlrni.du.q $vr12, $vr10, 4 +# CHECK-ENCODING: encoding: [0x4c,0x11,0x56,0x73] diff --git a/llvm/test/MC/LoongArch/lsx/ssub.s b/llvm/test/MC/LoongArch/lsx/ssub.s new file mode 100644 index 0000000000000000000000000000000000000000..603d26a05ccf38cfaa80ae4ff117f40bb927b644 --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/ssub.s @@ -0,0 +1,36 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vssub.b $vr10, $vr10, $vr11 +# CHECK-INST: vssub.b $vr10, $vr10, $vr11 +# CHECK-ENCODING: encoding: [0x4a,0x2d,0x48,0x70] + +vssub.h $vr2, $vr18, $vr5 +# CHECK-INST: vssub.h $vr2, $vr18, $vr5 +# CHECK-ENCODING: encoding: [0x42,0x96,0x48,0x70] + +vssub.w $vr28, $vr10, $vr2 +# CHECK-INST: vssub.w $vr28, $vr10, $vr2 +# CHECK-ENCODING: encoding: [0x5c,0x09,0x49,0x70] + +vssub.d $vr25, $vr3, $vr10 +# CHECK-INST: vssub.d $vr25, $vr3, $vr10 +# CHECK-ENCODING: encoding: [0x79,0xa8,0x49,0x70] + +vssub.bu $vr31, $vr13, $vr11 +# CHECK-INST: vssub.bu $vr31, $vr13, $vr11 +# CHECK-ENCODING: encoding: [0xbf,0x2d,0x4c,0x70] + +vssub.hu $vr15, $vr19, $vr9 +# CHECK-INST: vssub.hu $vr15, $vr19, $vr9 +# CHECK-ENCODING: encoding: [0x6f,0xa6,0x4c,0x70] + +vssub.wu $vr15, $vr12, $vr14 +# CHECK-INST: vssub.wu $vr15, $vr12, $vr14 +# CHECK-ENCODING: encoding: [0x8f,0x39,0x4d,0x70] + +vssub.du $vr29, $vr4, $vr11 +# CHECK-INST: vssub.du $vr29, $vr4, $vr11 +# CHECK-ENCODING: encoding: [0x9d,0xac,0x4d,0x70] diff --git a/llvm/test/MC/LoongArch/lsx/st.s b/llvm/test/MC/LoongArch/lsx/st.s new file mode 100644 index 0000000000000000000000000000000000000000..e4e05aa2f3bbec7aadce890d6634133e352432da --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/st.s @@ -0,0 +1,12 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vst $vr9, $r17, 1700 +# CHECK-INST: vst $vr9, $t5, 1700 +# CHECK-ENCODING: encoding: [0x29,0x92,0x5a,0x2c] + +vstx $vr23, $r17, $r31 +# CHECK-INST: vstx $vr23, $t5, $s8 +# CHECK-ENCODING: encoding: [0x37,0x7e,0x44,0x38] diff --git a/llvm/test/MC/LoongArch/lsx/stelm.s b/llvm/test/MC/LoongArch/lsx/stelm.s new file mode 100644 index 0000000000000000000000000000000000000000..8935d99336f497ec1c7b37ff8eff887e408daade --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/stelm.s @@ -0,0 +1,20 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vstelm.b $vr22, $r31, -90, 12 +# CHECK-INST: vstelm.b $vr22, $s8, -90, 12 +# CHECK-ENCODING: encoding: [0xf6,0x9b,0xb2,0x31] + +vstelm.h $vr28, $r2, 48, 7 +# CHECK-INST: vstelm.h $vr28, $tp, 48, 7 +# CHECK-ENCODING: encoding: [0x5c,0x60,0x5c,0x31] + +vstelm.w $vr18, $r12, -40, 2 +# CHECK-INST: vstelm.w $vr18, $t0, -40, 2 +# CHECK-ENCODING: encoding: [0x92,0xd9,0x2b,0x31] + +vstelm.d $vr4, $r23, -248, 1 +# CHECK-INST: vstelm.d $vr4, $s0, -248, 1 +# CHECK-ENCODING: encoding: [0xe4,0x86,0x17,0x31] diff --git a/llvm/test/MC/LoongArch/lsx/sub.s b/llvm/test/MC/LoongArch/lsx/sub.s new file mode 100644 index 0000000000000000000000000000000000000000..8f3c55a28a5808b8682242997c0f1a11dab84370 --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/sub.s @@ -0,0 +1,24 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vsub.b $vr7, $vr21, $vr25 +# CHECK-INST: vsub.b $vr7, $vr21, $vr25 +# CHECK-ENCODING: encoding: [0xa7,0x66,0x0c,0x70] + +vsub.h $vr23, $vr7, $vr4 +# CHECK-INST: vsub.h $vr23, $vr7, $vr4 +# CHECK-ENCODING: encoding: [0xf7,0x90,0x0c,0x70] + +vsub.w $vr28, $vr27, $vr25 +# CHECK-INST: vsub.w $vr28, $vr27, $vr25 +# CHECK-ENCODING: encoding: [0x7c,0x67,0x0d,0x70] + +vsub.d $vr27, $vr11, $vr20 +# CHECK-INST: vsub.d $vr27, $vr11, $vr20 +# CHECK-ENCODING: encoding: [0x7b,0xd1,0x0d,0x70] + +vsub.q $vr8, $vr11, $vr15 +# CHECK-INST: vsub.q $vr8, $vr11, $vr15 +# CHECK-ENCODING: encoding: [0x68,0xbd,0x2d,0x71] diff --git a/llvm/test/MC/LoongArch/lsx/subi.s b/llvm/test/MC/LoongArch/lsx/subi.s new file mode 100644 index 0000000000000000000000000000000000000000..d26f306b713e834c4adadc5685f33f911a5520ec --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/subi.s @@ -0,0 +1,20 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vsubi.bu $vr21, $vr1, 16 +# CHECK-INST: vsubi.bu $vr21, $vr1, 16 +# CHECK-ENCODING: encoding: [0x35,0x40,0x8c,0x72] + +vsubi.hu $vr10, $vr24, 8 +# CHECK-INST: vsubi.hu $vr10, $vr24, 8 +# CHECK-ENCODING: encoding: [0x0a,0xa3,0x8c,0x72] + +vsubi.wu $vr10, $vr13, 8 +# CHECK-INST: vsubi.wu $vr10, $vr13, 8 +# CHECK-ENCODING: encoding: [0xaa,0x21,0x8d,0x72] + +vsubi.du $vr27, $vr0, 29 +# CHECK-INST: vsubi.du $vr27, $vr0, 29 +# CHECK-ENCODING: encoding: [0x1b,0xf4,0x8d,0x72] diff --git a/llvm/test/MC/LoongArch/lsx/subw.s b/llvm/test/MC/LoongArch/lsx/subw.s new file mode 100644 index 0000000000000000000000000000000000000000..49fc2a02e1a5faa77b18f0e91219e31b4d588a5f --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/subw.s @@ -0,0 +1,68 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vsubwev.h.b $vr21, $vr25, $vr20 +# CHECK-INST: vsubwev.h.b $vr21, $vr25, $vr20 +# CHECK-ENCODING: encoding: [0x35,0x53,0x20,0x70] + +vsubwev.w.h $vr11, $vr8, $vr10 +# CHECK-INST: vsubwev.w.h $vr11, $vr8, $vr10 +# CHECK-ENCODING: encoding: [0x0b,0xa9,0x20,0x70] + +vsubwev.d.w $vr30, $vr6, $vr24 +# CHECK-INST: vsubwev.d.w $vr30, $vr6, $vr24 +# CHECK-ENCODING: encoding: [0xde,0x60,0x21,0x70] + +vsubwev.q.d $vr4, $vr14, $vr23 +# CHECK-INST: vsubwev.q.d $vr4, $vr14, $vr23 +# CHECK-ENCODING: encoding: [0xc4,0xdd,0x21,0x70] + +vsubwev.h.bu $vr25, $vr20, $vr2 +# CHECK-INST: vsubwev.h.bu $vr25, $vr20, $vr2 +# CHECK-ENCODING: encoding: [0x99,0x0a,0x30,0x70] + +vsubwev.w.hu $vr1, $vr9, $vr28 +# CHECK-INST: vsubwev.w.hu $vr1, $vr9, $vr28 +# CHECK-ENCODING: encoding: [0x21,0xf1,0x30,0x70] + +vsubwev.d.wu $vr23, $vr13, $vr2 +# CHECK-INST: vsubwev.d.wu $vr23, $vr13, $vr2 +# CHECK-ENCODING: encoding: [0xb7,0x09,0x31,0x70] + +vsubwev.q.du $vr9, $vr28, $vr12 +# CHECK-INST: vsubwev.q.du $vr9, $vr28, $vr12 +# CHECK-ENCODING: encoding: [0x89,0xb3,0x31,0x70] + +vsubwod.h.b $vr9, $vr12, $vr26 +# CHECK-INST: vsubwod.h.b $vr9, $vr12, $vr26 +# CHECK-ENCODING: encoding: [0x89,0x69,0x24,0x70] + +vsubwod.w.h $vr31, $vr2, $vr10 +# CHECK-INST: vsubwod.w.h $vr31, $vr2, $vr10 +# CHECK-ENCODING: encoding: [0x5f,0xa8,0x24,0x70] + +vsubwod.d.w $vr6, $vr16, $vr15 +# CHECK-INST: vsubwod.d.w $vr6, $vr16, $vr15 +# CHECK-ENCODING: encoding: [0x06,0x3e,0x25,0x70] + +vsubwod.q.d $vr22, $vr0, $vr18 +# CHECK-INST: vsubwod.q.d $vr22, $vr0, $vr18 +# CHECK-ENCODING: encoding: [0x16,0xc8,0x25,0x70] + +vsubwod.h.bu $vr3, $vr17, $vr11 +# CHECK-INST: vsubwod.h.bu $vr3, $vr17, $vr11 +# CHECK-ENCODING: encoding: [0x23,0x2e,0x34,0x70] + +vsubwod.w.hu $vr9, $vr16, $vr26 +# CHECK-INST: vsubwod.w.hu $vr9, $vr16, $vr26 +# CHECK-ENCODING: encoding: [0x09,0xea,0x34,0x70] + +vsubwod.d.wu $vr23, $vr9, $vr8 +# CHECK-INST: vsubwod.d.wu $vr23, $vr9, $vr8 +# CHECK-ENCODING: encoding: [0x37,0x21,0x35,0x70] + +vsubwod.q.du $vr8, $vr15, $vr7 +# CHECK-INST: vsubwod.q.du $vr8, $vr15, $vr7 +# CHECK-ENCODING: encoding: [0xe8,0x9d,0x35,0x70] diff --git a/llvm/test/MC/LoongArch/lsx/xor.s b/llvm/test/MC/LoongArch/lsx/xor.s new file mode 100644 index 0000000000000000000000000000000000000000..4d8c6eedbf68e05003a02ac9837a75e6c0e705dc --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/xor.s @@ -0,0 +1,8 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vxor.v $vr28, $vr16, $vr18 +# CHECK-INST: vxor.v $vr28, $vr16, $vr18 +# CHECK-ENCODING: encoding: [0x1c,0x4a,0x27,0x71] diff --git a/llvm/test/MC/LoongArch/lsx/xori.s b/llvm/test/MC/LoongArch/lsx/xori.s new file mode 100644 index 0000000000000000000000000000000000000000..c06fb2179e192d487dcca2b363474139ec8bd7f5 --- /dev/null +++ b/llvm/test/MC/LoongArch/lsx/xori.s @@ -0,0 +1,8 @@ +# RUN: llvm-mc --triple=loongarch64 --show-encoding %s | \ +# RUN: FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +# RUN: llvm-mc --triple=loongarch64 --filetype=obj %s | \ +# RUN: llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-INST + +vxori.b $vr13, $vr4, 74 +# CHECK-INST: vxori.b $vr13, $vr4, 74 +# CHECK-ENCODING: encoding: [0x8d,0x28,0xd9,0x73] diff --git a/llvm/test/TableGen/string_ops.td b/llvm/test/TableGen/string_ops.td new file mode 100644 index 0000000000000000000000000000000000000000..68581dd9cd65548c0814fd2ad238dd588c0e26b4 --- /dev/null +++ b/llvm/test/TableGen/string_ops.td @@ -0,0 +1,39 @@ +// RUN: llvm-tblgen %s | FileCheck %s + +class ToLower { + string s = !tolower(str); +} + +class ToUpper { + string s = !toupper(str); +} + +// CHECK-LABEL: def LOWER1 { +// CHECK: string s = "str"; +// CHECK: } +def LOWER1: ToLower<"STR">; + +// CHECK-LABEL: def LOWER2 { +// CHECK: string s = "str"; +// CHECK: } +def LOWER2 : ToLower<"Str">; + +// CHECK-LABEL: def LOWER3 { +// CHECK: string s = "str"; +// CHECK: } +def LOWER3 : ToLower<"STr">; + +// CHECK-LABEL: def UPPER1 { +// CHECK: string s = "STR"; +// CHECK: } +def UPPER1 : ToUpper<"str">; + +// CHECK-LABEL: def UPPER2 { +// CHECK: string s = "STR"; +// CHECK: } +def UPPER2 : ToUpper<"sTr">; + +// CHECK-LABEL: def UPPER3 { +// CHECK: string s = "STR"; +// CHECK: } +def UPPER3 : ToUpper<"sTR">; \ No newline at end of file diff --git a/llvm/test/Transforms/GlobalOpt/globalvar-code-model.ll b/llvm/test/Transforms/GlobalOpt/globalvar-code-model.ll new file mode 100644 index 0000000000000000000000000000000000000000..276a49474f73f4dd40584c443af3f40de07b8c7c --- /dev/null +++ b/llvm/test/Transforms/GlobalOpt/globalvar-code-model.ll @@ -0,0 +1,11 @@ +; RUN: opt -passes=globalopt -S < %s | FileCheck %s + +@G = internal global i32 5, code_model "large" + +define i32 @test() norecurse { + %a = load i32, ptr @G + store i32 4, ptr @G + ret i32 %a +} + +; CHECK: @G = internal unnamed_addr global i1 false, code_model "large" diff --git a/llvm/test/Transforms/LoopVectorize/LoongArch/defaults.ll b/llvm/test/Transforms/LoopVectorize/LoongArch/defaults.ll new file mode 100644 index 0000000000000000000000000000000000000000..dcddc90b241c44070f1532e85140891f28305e8c --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/LoongArch/defaults.ll @@ -0,0 +1,65 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -passes=loop-vectorize -mtriple loongarch64-linux-gnu -mattr=+lasx,+auto-vec -S | FileCheck %s + +;; This is a collection of tests whose only purpose is to show changes in the +;; default configuration. Please keep these tests minimal - if you're testing +;; functionality of some specific configuration, please place that in a +;; seperate test file with a hard coded configuration (even if that +;; configuration is the current default). + +target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128" +target triple = "loongarch64" + +define void @vector_add(ptr noalias nocapture %a, i64 %v) { +; CHECK-LABEL: @vector_add( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[V:%.*]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 +; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i64> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: store <4 x i64> [[TMP3]], ptr [[TMP2]], align 8 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 +; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, 1024 +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] +; CHECK-NEXT: [[ELEM:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 +; CHECK-NEXT: [[ADD:%.*]] = add i64 [[ELEM]], [[V]] +; CHECK-NEXT: store i64 [[ADD]], ptr [[ARRAYIDX]], align 8 +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv + %elem = load i64, ptr %arrayidx + %add = add i64 %elem, %v + store i64 %add, ptr %arrayidx + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 1024 + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: + ret void +} diff --git a/llvm/test/Transforms/LoopVectorize/LoongArch/lit.local.cfg b/llvm/test/Transforms/LoopVectorize/LoongArch/lit.local.cfg new file mode 100644 index 0000000000000000000000000000000000000000..9570af17fe5f1fc84b1485630bbd7548e0b65d8e --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/LoongArch/lit.local.cfg @@ -0,0 +1,4 @@ +config.suffixes = [".ll"] + +if not "LoongArch" in config.root.targets: + config.unsupported = True diff --git a/llvm/test/tools/llvm-objdump/ELF/LoongArch/branches.s b/llvm/test/tools/llvm-objdump/ELF/LoongArch/branches.s new file mode 100644 index 0000000000000000000000000000000000000000..8cb00aef9954272ca2b95b7fff4dd8819ba064c4 --- /dev/null +++ b/llvm/test/tools/llvm-objdump/ELF/LoongArch/branches.s @@ -0,0 +1,76 @@ +# RUN: llvm-mc --triple=loongarch32 --filetype=obj < %s | \ +# RUN: llvm-objdump -d --no-show-raw-insn - | FileCheck %s +# RUN: llvm-mc --triple=loongarch64 --filetype=obj < %s | \ +# RUN: llvm-objdump -d --no-show-raw-insn - | FileCheck %s + +# CHECK-LABEL: : +foo: +# CHECK: beq $a0, $a1, 108 +beq $a0, $a1, .Llocal +# CHECK: bne $a0, $a1, 104 +bne $a0, $a1, .Llocal +# CHECK: blt $a0, $a1, 100 +blt $a0, $a1, .Llocal +# CHECK: bltu $a0, $a1, 96 +bltu $a0, $a1, .Llocal +# CHECK: bge $a0, $a1, 92 +bge $a0, $a1, .Llocal +# CHECK: bgeu $a0, $a1, 88 +bgeu $a0, $a1, .Llocal +# CHECK: beqz $a0, 84 +beqz $a0, .Llocal +# CHECK: bnez $a0, 80 +bnez $a0, .Llocal +# CHECK: bceqz $fcc6, 76 +bceqz $fcc6, .Llocal +# CHECK: bcnez $fcc6, 72 +bcnez $fcc6, .Llocal + +# CHECK: beq $a0, $a1, 76 +beq $a0, $a1, bar +# CHECK: bne $a0, $a1, 72 +bne $a0, $a1, bar +# CHECK: blt $a0, $a1, 68 +blt $a0, $a1, bar +# CHECK: bltu $a0, $a1, 64 +bltu $a0, $a1, bar +# CHECK: bge $a0, $a1, 60 +bge $a0, $a1, bar +# CHECK: bgeu $a0, $a1, 56 +bgeu $a0, $a1, bar +# CHECK: beqz $a0, 52 +beqz $a0, bar +# CHECK: bnez $a0, 48 +bnez $a0, bar +# CHECK: bceqz $fcc6, 44 +bceqz $fcc6, bar +# CHECK: bcnez $fcc6, 40 +bcnez $fcc6, bar + +# CHECK: b 28 +b .Llocal +# CHECK: b 32 +b bar + +# CHECK: bl 20 +bl .Llocal +# CHECK: bl 24 +bl bar + +# CHECK: jirl $zero, $a0, 4{{$}} +jirl $zero, $a0, 4 +# CHECK: jirl $ra, $a0, 4{{$}} +jirl $ra, $a0, 4 +# CHECK: ret +ret + +.Llocal: +# CHECK: 6c: nop +# CHECK: nop +nop +nop + +# CHECK-LABEL: : +bar: +# CHECK: 74: nop +nop diff --git a/llvm/test/tools/llvm-objdump/ELF/LoongArch/lit.local.cfg b/llvm/test/tools/llvm-objdump/ELF/LoongArch/lit.local.cfg new file mode 100644 index 0000000000000000000000000000000000000000..cc24278acbb414ab5be93cffabda76082cc18a3a --- /dev/null +++ b/llvm/test/tools/llvm-objdump/ELF/LoongArch/lit.local.cfg @@ -0,0 +1,2 @@ +if not "LoongArch" in config.root.targets: + config.unsupported = True diff --git a/llvm/test/tools/llvm-readobj/ELF/reloc-types-loongarch64.test b/llvm/test/tools/llvm-readobj/ELF/reloc-types-loongarch64.test index e32dc893fa7985d41986aab7c874c25117a87323..88ff7fa405ed95f2aa23507cfd5992f44fce3dc3 100644 --- a/llvm/test/tools/llvm-readobj/ELF/reloc-types-loongarch64.test +++ b/llvm/test/tools/llvm-readobj/ELF/reloc-types-loongarch64.test @@ -102,6 +102,7 @@ # CHECK: Type: R_LARCH_ADD_ULEB128 (107) # CHECK: Type: R_LARCH_SUB_ULEB128 (108) # CHECK: Type: R_LARCH_64_PCREL (109) +# CHECK: Type: R_LARCH_CALL36 (110) --- !ELF FileHeader: @@ -211,3 +212,4 @@ Sections: - Type: R_LARCH_ADD_ULEB128 - Type: R_LARCH_SUB_ULEB128 - Type: R_LARCH_64_PCREL + - Type: R_LARCH_CALL36 diff --git a/llvm/tools/lto/lto.exports b/llvm/tools/lto/lto.exports index 3abae5f0fcbafdebf25243d5d0965e8f26f5f377..4164c3919a97fa5e96dd28a7aa3c6a05a8e3a33a 100644 --- a/llvm/tools/lto/lto.exports +++ b/llvm/tools/lto/lto.exports @@ -45,12 +45,6 @@ lto_codegen_compile_optimized lto_codegen_set_should_internalize lto_codegen_set_should_embed_uselists lto_set_debug_options -LLVMCreateDisasm -LLVMCreateDisasmCPU -LLVMDisasmDispose -LLVMDisasmInstruction -LLVMSetDisasmOptions -LLVMCreateDisasmCPUFeatures thinlto_create_codegen thinlto_codegen_dispose thinlto_codegen_add_module diff --git a/llvm/tools/remarks-shlib/CMakeLists.txt b/llvm/tools/remarks-shlib/CMakeLists.txt index f22cedd9ead780421ea43cd6a6ad02505aba003e..d8686c743931273c6d30f10b29a6e25293b8b84d 100644 --- a/llvm/tools/remarks-shlib/CMakeLists.txt +++ b/llvm/tools/remarks-shlib/CMakeLists.txt @@ -9,7 +9,9 @@ if(LLVM_ENABLE_PIC) libremarks.cpp ) + if (NOT (BUILD_SHARED_LIBS OR LLVM_LINK_LLVM_DYLIB)) set(LLVM_EXPORTED_SYMBOL_FILE ${CMAKE_CURRENT_SOURCE_DIR}/Remarks.exports) + endif() add_llvm_library(Remarks SHARED INSTALL_WITH_TOOLCHAIN ${SOURCES}) diff --git a/llvm/unittests/Object/ELFTest.cpp b/llvm/unittests/Object/ELFTest.cpp index 35fc2ec698fbd7093109200479d7734a7fc781ff..33c1986989f0257a7f1a2455f9d351443e182695 100644 --- a/llvm/unittests/Object/ELFTest.cpp +++ b/llvm/unittests/Object/ELFTest.cpp @@ -251,6 +251,8 @@ TEST(ELFTest, getELFRelocationTypeNameForLoongArch) { getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_SUB_ULEB128)); EXPECT_EQ("R_LARCH_64_PCREL", getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_64_PCREL)); + EXPECT_EQ("R_LARCH_CALL36", + getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_CALL36)); } TEST(ELFTest, getELFRelativeRelocationType) { diff --git a/llvm/unittests/Target/LoongArch/CMakeLists.txt b/llvm/unittests/Target/LoongArch/CMakeLists.txt index 04c0efdc927acf3d38be98c410f5474a481b1ec5..b9561b58343286fc28405beba6247490e38e6fb3 100644 --- a/llvm/unittests/Target/LoongArch/CMakeLists.txt +++ b/llvm/unittests/Target/LoongArch/CMakeLists.txt @@ -18,6 +18,7 @@ set(LLVM_LINK_COMPONENTS add_llvm_target_unittest(LoongArchTests InstSizes.cpp + MCInstrAnalysisTest.cpp ) set_property(TARGET LoongArchTests PROPERTY FOLDER "Tests/UnitTests/TargetTests") diff --git a/llvm/unittests/Target/LoongArch/InstSizes.cpp b/llvm/unittests/Target/LoongArch/InstSizes.cpp index a3adb926006976d607f10bebab9c9b0a976277a5..2dc2c6088ae77d725badd3e52a597abe9207e34f 100644 --- a/llvm/unittests/Target/LoongArch/InstSizes.cpp +++ b/llvm/unittests/Target/LoongArch/InstSizes.cpp @@ -139,3 +139,18 @@ TEST(InstSizes, AtomicPseudo) { EXPECT_EQ(44u, II.getInstSizeInBytes(*I)); }); } + +TEST(InstSizes, StatePoint) { + std::unique_ptr TM = createTargetMachine(); + std::unique_ptr II = createInstrInfo(TM.get()); + + runChecks( + TM.get(), II.get(), " declare zeroext i1 @return_i1()\n", + // clang-format off + " STATEPOINT 0, 0, 0, target-flags(loongarch-call-plt) @return_i1, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, implicit-def $r3, implicit-def $r4\n", + // clang-format on + [](LoongArchInstrInfo &II, MachineFunction &MF) { + auto I = MF.begin()->begin(); + EXPECT_EQ(4u, II.getInstSizeInBytes(*I)); + }); +} diff --git a/llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp b/llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp new file mode 100644 index 0000000000000000000000000000000000000000..468ee79615d643c953255a8cc7f0450087a4b218 --- /dev/null +++ b/llvm/unittests/Target/LoongArch/MCInstrAnalysisTest.cpp @@ -0,0 +1,121 @@ +//===- MCInstrAnalysisTest.cpp - LoongArchMCInstrAnalysis unit tests ------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCInstrAnalysis.h" +#include "MCTargetDesc/LoongArchMCTargetDesc.h" +#include "llvm/MC/MCInstBuilder.h" +#include "llvm/MC/TargetRegistry.h" +#include "llvm/Support/TargetSelect.h" + +#include "gtest/gtest.h" + +#include + +using namespace llvm; + +namespace { + +class InstrAnalysisTest : public testing::TestWithParam { +protected: + std::unique_ptr Info; + std::unique_ptr Analysis; + + static void SetUpTestSuite() { + LLVMInitializeLoongArchTargetInfo(); + LLVMInitializeLoongArchTarget(); + LLVMInitializeLoongArchTargetMC(); + } + + InstrAnalysisTest() { + std::string Error; + const Target *TheTarget = + TargetRegistry::lookupTarget(Triple::normalize(GetParam()), Error); + Info = std::unique_ptr(TheTarget->createMCInstrInfo()); + Analysis = std::unique_ptr( + TheTarget->createMCInstrAnalysis(Info.get())); + } +}; + +} // namespace + +static MCInst beq() { + return MCInstBuilder(LoongArch::BEQ) + .addReg(LoongArch::R0) + .addReg(LoongArch::R1) + .addImm(32); +} + +static MCInst b() { return MCInstBuilder(LoongArch::B).addImm(32); } + +static MCInst bl() { return MCInstBuilder(LoongArch::BL).addImm(32); } + +static MCInst jirl(unsigned RD, unsigned RJ = LoongArch::R10) { + return MCInstBuilder(LoongArch::JIRL).addReg(RD).addReg(RJ).addImm(16); +} + +TEST_P(InstrAnalysisTest, IsTerminator) { + EXPECT_TRUE(Analysis->isTerminator(beq())); + EXPECT_TRUE(Analysis->isTerminator(b())); + EXPECT_FALSE(Analysis->isTerminator(bl())); + EXPECT_TRUE(Analysis->isTerminator(jirl(LoongArch::R0))); + EXPECT_FALSE(Analysis->isTerminator(jirl(LoongArch::R5))); +} + +TEST_P(InstrAnalysisTest, IsCall) { + EXPECT_FALSE(Analysis->isCall(beq())); + EXPECT_FALSE(Analysis->isCall(b())); + EXPECT_TRUE(Analysis->isCall(bl())); + EXPECT_TRUE(Analysis->isCall(jirl(LoongArch::R1))); + EXPECT_FALSE(Analysis->isCall(jirl(LoongArch::R0))); +} + +TEST_P(InstrAnalysisTest, IsReturn) { + EXPECT_FALSE(Analysis->isReturn(beq())); + EXPECT_FALSE(Analysis->isReturn(b())); + EXPECT_FALSE(Analysis->isReturn(bl())); + EXPECT_TRUE(Analysis->isReturn(jirl(LoongArch::R0, LoongArch::R1))); + EXPECT_FALSE(Analysis->isReturn(jirl(LoongArch::R0))); + EXPECT_FALSE(Analysis->isReturn(jirl(LoongArch::R1))); +} + +TEST_P(InstrAnalysisTest, IsBranch) { + EXPECT_TRUE(Analysis->isBranch(beq())); + EXPECT_TRUE(Analysis->isBranch(b())); + EXPECT_FALSE(Analysis->isBranch(bl())); + EXPECT_TRUE(Analysis->isBranch(jirl(LoongArch::R0))); + EXPECT_FALSE(Analysis->isBranch(jirl(LoongArch::R1))); + EXPECT_FALSE(Analysis->isBranch(jirl(LoongArch::R0, LoongArch::R1))); +} + +TEST_P(InstrAnalysisTest, IsConditionalBranch) { + EXPECT_TRUE(Analysis->isConditionalBranch(beq())); + EXPECT_FALSE(Analysis->isConditionalBranch(b())); + EXPECT_FALSE(Analysis->isConditionalBranch(bl())); +} + +TEST_P(InstrAnalysisTest, IsUnconditionalBranch) { + EXPECT_FALSE(Analysis->isUnconditionalBranch(beq())); + EXPECT_TRUE(Analysis->isUnconditionalBranch(b())); + EXPECT_FALSE(Analysis->isUnconditionalBranch(bl())); + EXPECT_TRUE(Analysis->isUnconditionalBranch(jirl(LoongArch::R0))); + EXPECT_FALSE(Analysis->isUnconditionalBranch(jirl(LoongArch::R1))); + EXPECT_FALSE( + Analysis->isUnconditionalBranch(jirl(LoongArch::R0, LoongArch::R1))); +} + +TEST_P(InstrAnalysisTest, IsIndirectBranch) { + EXPECT_FALSE(Analysis->isIndirectBranch(beq())); + EXPECT_FALSE(Analysis->isIndirectBranch(b())); + EXPECT_FALSE(Analysis->isIndirectBranch(bl())); + EXPECT_TRUE(Analysis->isIndirectBranch(jirl(LoongArch::R0))); + EXPECT_FALSE(Analysis->isIndirectBranch(jirl(LoongArch::R1))); + EXPECT_FALSE(Analysis->isIndirectBranch(jirl(LoongArch::R0, LoongArch::R1))); +} + +INSTANTIATE_TEST_SUITE_P(LA32And64, InstrAnalysisTest, + testing::Values("loongarch32", "loongarch64")); diff --git a/llvm/utils/kate/llvm-tablegen.xml b/llvm/utils/kate/llvm-tablegen.xml index 2a3f0403236634ae1ce248fc4e8c842178fb1778..496894452c46ea417e07b53b239526b98a9affa1 100644 --- a/llvm/utils/kate/llvm-tablegen.xml +++ b/llvm/utils/kate/llvm-tablegen.xml @@ -41,6 +41,8 @@ !ge !gt !ne + !tolower + !toupper class